---
title: "Bwindi Law Review Tables & Figures"
author: "Mark Buntaine"
header-includes:
   - \usepackage{arydshln}
   - \usepackage{rotating}
   - \usepackage{booktabs}

date: "3/12/2020"
output: pdf_document
---

```{r setup, include=FALSE, message=FALSE}
knitr::opts_chunk$set(echo = FALSE)

library(dplyr)
library(boot)
library(xtable)
library(grid)
library(gridExtra)
library(ggplot2)
library(stargazer)
library(estimatr)
library(lfe)
library(tidyr)
library(here)
```

```{r functions, include=FALSE}

diff_summ <- function(data,outcome,treat,sims){

outcome <- enquo(outcome)
treat <- enquo(treat)
  
my.mean = function(x, indices) {
return( mean( x[indices], na.rm=T ) )
}

out.ctl <- data %>% filter(!!treat==0) %>% pull(!!outcome) %>% boot(.,my.mean,R=sims) %>% '[['('t')
out.trt <- data %>% filter(!!treat==1) %>% pull(!!outcome) %>% boot(.,my.mean,R=sims) %>% '[['('t')
diff.boot <- out.trt-out.ctl

data %>% group_by(!!treat) %>%
  summarise(mean=mean(!!outcome, na.rm=T),
            boot.se=sd(boot(!!outcome,my.mean,R=sims)$t),
            n=n()) %>%
  arrange(desc(!!treat)) %>%
  rbind(list("diff",.$mean[1]-.$mean[2],sd(diff.boot),.$n[1]+.$n[2])) %>%
  mutate(status="pooled") %>%
  mutate(prt=paste(sprintf("%.2f", mean)," (se=", sprintf("%.3f", boot.se),", n=",n,")",sep="")) %>%
  dplyr::select(status,!!treat,prt)

}

# a <- rbinom(n=500, size=1, prob=0.2)
# b <- rbinom(n=500, size=1, prob=0.5)
# c <- c(a,b)
# trt <- c(rep(0,500),rep(1,500))
# dta <- tibble(c,trt)
# diff_summ(data=dta,outcome=c,treat=trt,sims=500)


split_summ <- function(data,outcome,treat,sims,split.var){

treat <- enquo(treat)
outcome <- enquo(outcome)
  
split.var <- enquo(split.var)
split.uniq.val <- data %>% pull(!!split.var) %>% na.omit() %>% unique()
  
top <- data %>% filter(!!split.var==split.uniq.val[1])
bottom <- data %>% filter(!!split.var==split.uniq.val[2])

top.summ <- diff_summ(data=top,outcome=!!outcome,treat=!!treat,sims=500)
top.summ <- top.summ %>% mutate(status=split.uniq.val[1])

bot.summ <- diff_summ(data=bottom,outcome=!!outcome,treat=!!treat,sims=500)
bot.summ <- bot.summ %>% mutate(status=split.uniq.val[2])

#split.val <- tibble(grouping=c(rep(split.uniq.val[1],3), rep(split.uniq.val[2],3)))

return(bind_rows(top.summ,bot.summ))

}

# dta <- dta %>% mutate(f=sample(c("F","M"),size=1000,replace=T),income=sample(c("high","low"),size=1000,replace=T))
# split_summ(data=dta,outcome=c,treat=trt,sims=500,split.var=income)

```

```{r data-read-setup-wd, warning=FALSE, message=FALSE, include=FALSE}
#Note: this copies code from the NOT RUN 'data-read' chunk in non-replication file for direct execution

# source2("~/Google Drive/Bwindi project/Code/WD_Replication/BDD_WD2018_replication.R", start=1, end=325) #participation phase data setup ----

root <- "~/Google Drive/Bwindi project/Code/WD_Replication"
#Note: all code and data files should be placed in a single directory and referenced in the line above
#Note: download from: https://doi.org/10.7910/DVN/15YF0R
#Note: change the file path "root" to the local working directory
#These files should be placed in a separate directory to the current replication files
setwd(root)

###Packages
library(interplot) #Version 0.1.5
library(interflex) #Version 1.0.3
library(lfe) #Version 2.6-2291
library(Lmoments) #Version 1.2-3, for inter.binning.90 function


###Functions

mean.imp <- function(data,var,cluster){
  for (i in 1:length(unique(data[,cluster]))){
    imp.value <- mean(data[data[,cluster]==unique(data[,cluster])[i], var], na.rm=T)
    data[data[,cluster]==unique(data[,cluster])[i] & is.na(data[,var]), var] <- imp.value
  }
  return(data)
} #This function automates mean imputation by cluster


felm.ri <- function(formula, dta, treat.var, sims, ...){
  require(lfe)
  
  ate <- coef(felm(formula, data=dta))[1]
  N <- felm(formula, data=dta)$N
  ate.samp.dist <- rep(NA,sims)
  
  for (i in 1:sims){
    dta[,treat.var] <- dta[,ncol(M)+i]
    ate.samp.dist[i] <- coef(felm(formula, data=dta))[1]
  }
  
  p.two.way <- sum(abs(ate)<abs(ate.samp.dist))/sims
  p.one.way.greater <- sum(ate<ate.samp.dist)/sims
  p.one.way.lesser <- sum(ate>ate.samp.dist)/sims
  se <- sd(ate.samp.dist)
  
  coun <- list("ate" = ate, "ate.samp.dist" = ate.samp.dist, "se"=se, "p.two.way" = p.two.way, "p.one.way.greater" = p.one.way.greater, "p.one.way.lesser" = p.one.way.lesser, "N" = N)
  return(coun)
}


lm.ri <- function(formula, dta, treat.var, sims, ...){
  require(lfe)
  
  ate <- coef(lm(formula, data=dta))[2]
  N <- nobs(lm(formula, data=dta))
  ate.samp.dist <- rep(NA,sims)
  
  for (i in 1:sims){
    dta[,treat.var] <- dta[,ncol(M)+i]
    ate.samp.dist[i] <- coef(lm(formula, data=dta))[2]
  }
  
  p.two.way <- sum(abs(ate)<abs(ate.samp.dist))/sims
  p.one.way.greater <- sum(ate<ate.samp.dist)/sims
  p.one.way.lesser <- sum(ate>ate.samp.dist)/sims
  se <- sd(ate.samp.dist)
  
  coun <- list("ate" = ate, "ate.samp.dist" = ate.samp.dist, "se"=se, "p.two.way" = p.two.way, "p.one.way.greater" = p.one.way.greater, "p.one.way.lesser" = p.one.way.lesser, "N" = N)
  return(coun)
}

source('inter.binning.90.R')
source('multiplot.R')

#Function to reset par() show that blocks can be run sequentially and without a plotting device
default.par <- par()

###Data input

B <- read.csv("baseline.csv", stringsAsFactors=FALSE)
E <- read.csv("endline.csv", stringsAsFactors=FALSE)
endline.subjects <- read.csv("endline_call_list.csv", row.names=1, stringsAsFactors=FALSE)[,1:4]
recruited.subjects <- read.csv("recruited_subjects.csv", stringsAsFactors=FALSE)
smsid <- read.csv("sms_received_list.csv", stringsAsFactors=FALSE)
Responsiveness <- read.csv("digital_responses.csv", stringsAsFactors=FALSE)
con.mat <- read.csv("contiguity_mat1.csv", stringsAsFactors=FALSE)
con2.mat <- read.csv("contiguity_mat2.csv", stringsAsFactors=FALSE)
con3.mat <- read.csv("contiguity_mat3.csv", stringsAsFactors=FALSE)
#Note the "con.mat" files have village "Kiziiba" manually removed because merged with "Buguro" at cleaning phase
spill.dist <- read.csv("spillover_matrix.csv", row.names=1)
loc.master <- read.csv("location_master.csv", stringsAsFactors=FALSE)
lc1.planning <- read.csv("planning_meeting_attendance.csv", stringsAsFactors=FALSE)
sbj_RI <- read.csv("assignment_ri.csv", stringsAsFactors=FALSE)


###Data setup

#Removing duplicate subject.id's in endline.subjects
endline.dup <- endline.subjects$Subject_ID_number[duplicated(endline.subjects$Subject_ID_number)]
see <- endline.subjects[endline.subjects$Subject_ID_number %in% endline.dup,]
see <- see[order(see$Subject_ID_number),] #Visual inspection: all subject id's are exact duplicates
M <- endline.subjects[!duplicated(endline.subjects$Subject_ID_number),] #Removing duplicated subject id's
length(unique(M$Subject_ID_number)) #This is the merge point, as all subject id's are unique

#Preparing baseline merge
B <- subset(B, B_subject_refuse_to_participate=="no") #Removing subjects who refused, no data
length(unique(B$Subject_ID_number)) #There are several duplicate subject id's in the raw baseline file
B.duplicated.list <- B$Subject_ID_number[duplicated(B$Subject_ID_number)] #List of duplicate subject id's in raw endline file
see <- B[B$Subject_ID_number %in% B.duplicated.list,]
see <- see[order(see$Subject_ID_number),]
B <- B[!(B$Subject_ID_number %in% B.duplicated.list),]
M <- merge(M,B, by="Subject_ID_number", all.x=TRUE) #still 1723 observations

#Preparing endline merge
E <- subset(E, E_1_Did_the_subject_refuse_to_p=="no") #Removing subjects who refused, no data
E.duplicated.list <- E$Subject_ID_number[duplicated(E$Subject_ID_number)] #List of duplicate subject id's in raw endline file
see <- E[E$Subject_ID_number %in% E.duplicated.list,]
see <- see[order(see$Subject_ID_number),]
E <- E[!(E$Subject_ID_number %in% E.duplicated.list),]
M <- merge(M,E, by="Subject_ID_number", all.x=TRUE) #still 1723 observations

#Merging treatment status and block from the assignment file
treat.dta <- recruited.subjects[,c(1,10,11,12)] #Getting subcounty, treatment status, and block
M <- merge(M,treat.dta, by="Subject_ID_number", all.x=TRUE) #still 1723 observations
M$subcounty.blocking <- ifelse(M$subcounty.blocking=="Rutenga", "Singles", M$subcounty.blocking) #Because of exclusion, Rutenga subcounty has only one village, moving to "Singles" block

#TextIt data
m7<-table(smsid$Subject_ID_number)
m7<-data.frame(m7)
names(m7)<-c("Subject_ID_number","participate.m7")
M <- merge(M,m7, by="Subject_ID_number", all.x=TRUE) #still 1723 observations

#Location cleaning
villages <- loc.master$village.updated
for (i in 1:length(villages)){
  villages[i] <- ifelse(loc.master$village.cleaned[i]=="", loc.master$village.updated[i], loc.master$village.cleaned[i])
}
villages[duplicated(villages)] #Checking for errors; looks good though some original errors resulted in villages with mixed treatment assignment (Kajumo, Omumbuga)

see <- subset(loc.master,excluded==0)
for (i in 1:length(villages)){
  villages[i] <- ifelse(see$village.cleaned[i]=="", see$village.updated[i], see$village.cleaned[i])
}
villages[duplicated(villages)] #Checking for errors; looks good though some original errors resulted in villages with mixed treatment assignment (Kajumo, Omumbuga)
see$final.id <- ifelse(see$location.id.cleaned=="",see$location.id,see$location.id.cleaned)
unique(see$final.id)

#unique(M$Updated.Village)[!(unique(M$Updated.Village) %in% villages)]
M$Updated.Village <- ifelse(M$Updated.Village=="Katooma","Katoma",M$Updated.Village)
M$Updated.Village <- ifelse(M$Updated.Village=="Kijumwa","Kijuma",M$Updated.Village)
M$Updated.Village <- ifelse(M$Updated.Village=="Kyogo (Kabale)","Kyogo (Rubanda)",M$Updated.Village)
M$Updated.Village <- ifelse(M$Updated.Village=="Mushija","Musheija",M$Updated.Village)
M$Updated.Village <- ifelse(M$Updated.Village=="Ndeego","Ndego",M$Updated.Village)

#Adding variables to probe spillover
spill.village.list <- names(spill.dist)
spill.village.list[!(spill.village.list %in% recruited.subjects$updated.village)] #list in spillover matrix that don't match

#con.village.list <- con.mat$Village..m.
#con.village.list[!(con.village.list %in% M$Updated.Village)] #list in contiguity matrix that don't match M village names
#con.village.list[!(con.village.list %in% loc.master$village.updated)] #list in contiguity matrix that don't match location master original names

#Cleaning the con.mat file for village names, see email thread "village names in matrices" from Jan-2017
con.mat$Village..m. <- ifelse(con.mat$Village..m.=="Kashija/ Rubuguri","Kashija",con.mat$Village..m.)
con.mat$Village..m. <- ifelse(con.mat$Village..m.=="Higabiro/ Rubuguri","Higabiro (Rubuguri)",con.mat$Village..m.)
#Higabiro (Rubuguri): this is an RS village, but we do not have any subjects from this village
con.mat$Village..m. <- ifelse(con.mat$Village..m.=="Nteeko","Nteko",con.mat$Village..m.)
con.mat$Village..m. <- ifelse(con.mat$Village..m.=="Nyakabungo( Rubimbwa)","Nyakabungo (Rubimbwa)",con.mat$Village..m.)
con.mat$Village..m. <- ifelse(con.mat$Village..m.=="Nyakabungo(Bushura)","Nyakabungo (Bushura)",con.mat$Village..m.)
con.mat$Village..m. <- ifelse(con.mat$Village..m.=="Ishaya (Mucusye)","Ishaya",con.mat$Village..m.)
con.mat$Village..m. <- ifelse(con.mat$Village..m.=="Kiziba","Kiziiba",con.mat$Village..m.)
con.mat$Village..m. <- ifelse(con.mat$Village..m.=="Bushegyenyi","Bushegenyi",con.mat$Village..m.)
con.mat$Village..m. <- ifelse(con.mat$Village..m.=="Kyibingo","Kibingo",con.mat$Village..m.)
con.mat$Village..m. <- ifelse(con.mat$Village..m.=="Kitahulira","Kitahurira (Hakikome)",con.mat$Village..m.)
con.mat$Village..m. <- ifelse(con.mat$Village..m.=="Nyakabungo","Nyakabungo (Kashekyera)",con.mat$Village..m.)
con.mat$Village..m. <- ifelse(con.mat$Village..m.=="Buzaniza","Buzaniro",con.mat$Village..m.)
con.mat$Village..m. <- ifelse(con.mat$Village..m.=="Mburamaizi","Mburameizi",con.mat$Village..m.)
con.mat$Village..m. <- ifelse(con.mat$Village..m.=="Bishanyu","Bishayu",con.mat$Village..m.)
con.mat$Village..m. <- ifelse(con.mat$Village..m.=="Katooma","Katoma",con.mat$Village..m.)
con.mat$Village..m. <- ifelse(con.mat$Village..m.=="Rwensaniro","Rwensanziro",con.mat$Village..m.)
con.mat$Village..m. <- ifelse(con.mat$Village..m.=="Rugando","Rugandu",con.mat$Village..m.)
con.mat$Village..m. <- ifelse(con.mat$Village..m.=="Ntunagamo","Ntungamo",con.mat$Village..m.)
con.mat$Village..m. <- ifelse(con.mat$Village..m.=="Nyakaranaga","Nyakaranga",con.mat$Village..m.)
con.mat$Village..m. <- ifelse(con.mat$Village..m.=="Kacwamuhoro","Kachwamuhoro",con.mat$Village..m.)
con.mat$Village..m. <- ifelse(con.mat$Village..m.=="Kitahurira","Kitahurira (Kashasha)",con.mat$Village..m.)
con.mat$Village..m. <- ifelse(con.mat$Village..m.=="Ndeego","Ndego",con.mat$Village..m.)
con.mat$Village..m. <- ifelse(con.mat$Village..m.=="kagogo","Kagogo",con.mat$Village..m.)
con.mat$Village..m. <- ifelse(con.mat$Village..m.=="Byamihanda","Ryamihanda",con.mat$Village..m.)
con.mat$Village..m. <- ifelse(con.mat$Village..m.=="Mushija","Musheija",con.mat$Village..m.)
con.mat$Village..m. <- ifelse(con.mat$Village..m.=="Kijumwa","Kijuma",con.mat$Village..m.)
con.mat$Village..m. <- ifelse(con.mat$Village..m.=="Kiziiba","Bugoro",con.mat$Village..m.)
con.mat$Village..m. <- ifelse(con.mat$Village..m.=="Kyogo (Kabale)","Kyogo (Rubanda)",con.mat$Village..m.)

#Cleaning the con2.mat file for village names, see email thread "village names in matrices" from Jan-2017
con2.mat$Village..m. <- ifelse(con2.mat$Village..m.=="Kashija/ Rubuguri","Kashija",con2.mat$Village..m.)
con2.mat$Village..m. <- ifelse(con2.mat$Village..m.=="Higabiro/ Rubuguri","Higabiro (Rubuguri)",con2.mat$Village..m.)
#Higabiro (Rubuguri): this is an RS village, but we do not have any subjects from this village
con2.mat$Village..m. <- ifelse(con2.mat$Village..m.=="Nteeko","Nteko",con2.mat$Village..m.)
con2.mat$Village..m. <- ifelse(con2.mat$Village..m.=="Nyakabungo( Rubimbwa)","Nyakabungo (Rubimbwa)",con2.mat$Village..m.)
con2.mat$Village..m. <- ifelse(con2.mat$Village..m.=="Nyakabungo(Bushura)","Nyakabungo (Bushura)",con2.mat$Village..m.)
con2.mat$Village..m. <- ifelse(con2.mat$Village..m.=="Ishaya (Mucusye)","Ishaya",con2.mat$Village..m.)
con2.mat$Village..m. <- ifelse(con2.mat$Village..m.=="Kiziba","Kiziiba",con2.mat$Village..m.)
con2.mat$Village..m. <- ifelse(con2.mat$Village..m.=="Bushegyenyi","Bushegenyi",con2.mat$Village..m.)
con2.mat$Village..m. <- ifelse(con2.mat$Village..m.=="Kyibingo","Kibingo",con2.mat$Village..m.)
con2.mat$Village..m. <- ifelse(con2.mat$Village..m.=="Kitahulira","Kitahurira (Hakikome)",con2.mat$Village..m.)
con2.mat$Village..m. <- ifelse(con2.mat$Village..m.=="Nyakabungo","Nyakabungo (Kashekyera)",con2.mat$Village..m.)
con2.mat$Village..m. <- ifelse(con2.mat$Village..m.=="Buzaniza","Buzaniro",con2.mat$Village..m.)
con2.mat$Village..m. <- ifelse(con2.mat$Village..m.=="Mburamaizi","Mburameizi",con2.mat$Village..m.)
con2.mat$Village..m. <- ifelse(con2.mat$Village..m.=="Bishanyu","Bishayu",con2.mat$Village..m.)
con2.mat$Village..m. <- ifelse(con2.mat$Village..m.=="Katooma","Katoma",con2.mat$Village..m.)
con2.mat$Village..m. <- ifelse(con2.mat$Village..m.=="Rwensaniro","Rwensanziro",con2.mat$Village..m.)
con2.mat$Village..m. <- ifelse(con2.mat$Village..m.=="Rugando","Rugandu",con2.mat$Village..m.)
con2.mat$Village..m. <- ifelse(con2.mat$Village..m.=="Ntunagamo","Ntungamo",con2.mat$Village..m.)
con2.mat$Village..m. <- ifelse(con2.mat$Village..m.=="Nyakaranaga","Nyakaranga",con2.mat$Village..m.)
con2.mat$Village..m. <- ifelse(con2.mat$Village..m.=="Kacwamuhoro","Kachwamuhoro",con2.mat$Village..m.)
con2.mat$Village..m. <- ifelse(con2.mat$Village..m.=="Kitahurira","Kitahurira (Kashasha)",con2.mat$Village..m.)
con2.mat$Village..m. <- ifelse(con2.mat$Village..m.=="Ndeego","Ndego",con2.mat$Village..m.)
con2.mat$Village..m. <- ifelse(con2.mat$Village..m.=="kagogo","Kagogo",con2.mat$Village..m.)
con2.mat$Village..m. <- ifelse(con2.mat$Village..m.=="Byamihanda","Ryamihanda",con2.mat$Village..m.)
con2.mat$Village..m. <- ifelse(con2.mat$Village..m.=="Mushija","Musheija",con2.mat$Village..m.)
con2.mat$Village..m. <- ifelse(con2.mat$Village..m.=="Kijumwa","Kijuma",con2.mat$Village..m.)
con2.mat$Village..m. <- ifelse(con2.mat$Village..m.=="Kiziiba","Bugoro",con2.mat$Village..m.)
con2.mat$Village..m. <- ifelse(con2.mat$Village..m.=="Kyogo (Kabale)","Kyogo (Rubanda)",con2.mat$Village..m.)

#Cleaning the con3.mat file for village names, see email thread "village names in matrices" from Jan-2017
con3.mat$Village..m. <- ifelse(con3.mat$Village..m.=="Kashija/ Rubuguri","Kashija",con3.mat$Village..m.)
con3.mat$Village..m. <- ifelse(con3.mat$Village..m.=="Higabiro/ Rubuguri","Higabiro (Rubuguri)",con3.mat$Village..m.)
#Higabiro (Rubuguri): this is an RS village, but we do not have any subjects from this village
con3.mat$Village..m. <- ifelse(con3.mat$Village..m.=="Nteeko","Nteko",con3.mat$Village..m.)
con3.mat$Village..m. <- ifelse(con3.mat$Village..m.=="Nyakabungo( Rubimbwa)","Nyakabungo (Rubimbwa)",con3.mat$Village..m.)
con3.mat$Village..m. <- ifelse(con3.mat$Village..m.=="Nyakabungo(Bushura)","Nyakabungo (Bushura)",con3.mat$Village..m.)
con3.mat$Village..m. <- ifelse(con3.mat$Village..m.=="Ishaya (Mucusye)","Ishaya",con3.mat$Village..m.)
con3.mat$Village..m. <- ifelse(con3.mat$Village..m.=="Kiziba","Kiziiba",con3.mat$Village..m.)
con3.mat$Village..m. <- ifelse(con3.mat$Village..m.=="Bushegyenyi","Bushegenyi",con3.mat$Village..m.)
con3.mat$Village..m. <- ifelse(con3.mat$Village..m.=="Kyibingo","Kibingo",con3.mat$Village..m.)
con3.mat$Village..m. <- ifelse(con3.mat$Village..m.=="Kitahulira","Kitahurira (Hakikome)",con3.mat$Village..m.)
con3.mat$Village..m. <- ifelse(con3.mat$Village..m.=="Nyakabungo","Nyakabungo (Kashekyera)",con3.mat$Village..m.)
con3.mat$Village..m. <- ifelse(con3.mat$Village..m.=="Buzaniza","Buzaniro",con3.mat$Village..m.)
con3.mat$Village..m. <- ifelse(con3.mat$Village..m.=="Mburamaizi","Mburameizi",con3.mat$Village..m.)
con3.mat$Village..m. <- ifelse(con3.mat$Village..m.=="Bishanyu","Bishayu",con3.mat$Village..m.)
con3.mat$Village..m. <- ifelse(con3.mat$Village..m.=="Katooma","Katoma",con3.mat$Village..m.)
con3.mat$Village..m. <- ifelse(con3.mat$Village..m.=="Rwensaniro","Rwensanziro",con3.mat$Village..m.)
con3.mat$Village..m. <- ifelse(con3.mat$Village..m.=="Rugando","Rugandu",con3.mat$Village..m.)
con3.mat$Village..m. <- ifelse(con3.mat$Village..m.=="Ntunagamo","Ntungamo",con3.mat$Village..m.)
con3.mat$Village..m. <- ifelse(con3.mat$Village..m.=="Nyakaranaga","Nyakaranga",con3.mat$Village..m.)
con3.mat$Village..m. <- ifelse(con3.mat$Village..m.=="Kacwamuhoro","Kachwamuhoro",con3.mat$Village..m.)
con3.mat$Village..m. <- ifelse(con3.mat$Village..m.=="Kitahurira","Kitahurira (Kashasha)",con3.mat$Village..m.)
con3.mat$Village..m. <- ifelse(con3.mat$Village..m.=="Ndeego","Ndego",con3.mat$Village..m.)
con3.mat$Village..m. <- ifelse(con3.mat$Village..m.=="kagogo","Kagogo",con3.mat$Village..m.)
con3.mat$Village..m. <- ifelse(con3.mat$Village..m.=="Byamihanda","Ryamihanda",con3.mat$Village..m.)
con3.mat$Village..m. <- ifelse(con3.mat$Village..m.=="Mushija","Musheija",con3.mat$Village..m.)
con3.mat$Village..m. <- ifelse(con3.mat$Village..m.=="Kijumwa","Kijuma",con3.mat$Village..m.)
con3.mat$Village..m. <- ifelse(con3.mat$Village..m.=="Kiziiba","Bugoro",con3.mat$Village..m.)
con3.mat$Village..m. <- ifelse(con3.mat$Village..m.=="Kyogo (Kabale)","Kyogo (Rubanda)",con3.mat$Village..m.)

#Cleaning loc.master names into one vector
loc.master$village.final <- ifelse(loc.master$village.cleaned=="", loc.master$village.updated, loc.master$village.cleaned)
loc.master$village.final <- ifelse(loc.master$village.final=="Kikangaga ", "Kikangaga", loc.master$village.final)

#Checking the location.id cleaning in all files
con.mat$Village..m.[!(con.mat$Village..m. %in% loc.master$village.final)] #Notes: only Higabiro (Rubuguri), no subjects in study
loc.master$village.final[!(loc.master$village.final %in% con.mat$Village..m.)] #Notes: "Kinaba" "Kinaba" "Kyogo"  "Rukere" all excluded
unique(M$Updated.Village) %in% loc.master$village.final #Note: all appear in cleaned loc.master
unique(M$Updated.Village)[!(unique(M$Updated.Village) %in% con.mat$Village..m.)] #"Kinaba" does not specify which one, remove Kinaba from dataframe?

#Correcting Kinaba village names (redistricted), for those we were able to reach, see "Corrected villages.xls" and email thread "Bwindi Village Confusion"
M$Updated.Village[M$Subject_ID_number %in% c(2581,2583,2587)] <- "Nyabiha"
M$Updated.Village[M$Subject_ID_number %in% c(2518)] <- "Kyabworo"
M <- M[M$Updated.Village!="Kinaba",] #Removing "Kinaba" from dataframe per exclusion from revenue sharing during '16/'17 round

#Creating spillover indicator in M
treated.villages <- loc.master$village.final[loc.master$treat==1]
#Note: rows/columns in con.mat must be ordered identically, code below checks
#col.n <- colnames(con.mat)[-1]
#ex <- dataframe(con.mat$Village..m.,col.n)
#con.mat$Village..m.[48] <- "Kiziba" #Correction discussed in 2/20/17 email to Colleen

for (i in 1:nrow(M)){
  row.test <- con.mat[M$Updated.Village[i]==con.mat$Village..m.,-1]==1
  row.test2 <- con2.mat[M$Updated.Village[i]==con2.mat$Village..m.,-1]==1
  row.test3 <- con3.mat[M$Updated.Village[i]==con3.mat$Village..m.,-1]==1
  con.villages <- con.mat$Village..m.[row.test]
  con2.villages <- con2.mat$Village..m.[row.test2]
  con3.villages <- con3.mat$Village..m.[row.test3]
  M$S1[i] <- sum(con.villages %in% treated.villages)
  M$S2[i] <- sum(con2.villages %in% treated.villages)
  M$S3[i] <- sum(con3.villages %in% treated.villages)
  
}
M$S1 <- as.factor(M$S1)
M$S2 <- as.factor(M$S2)
M$S3 <- as.factor(M$S3)

#Treatment Density (number of subjects per village)
for (i in 1:nrow(M)){
  sub <- subset(M, Updated.Village==M$Updated.Village[i])
  M$subjects.per.village[i] <- nrow(sub)
}

#Attrition
M$attr <- ifelse(is.na(M$E_id), 1, 0)

#Setting up RI object
source('outcome_var_mean_imp_setup.R')
sbj_RI <- sbj_RI[,c(1,5:ncol(sbj_RI))] #selecting only the treatment status to avoid introducing duplicate data to M

```

```{r data-read-setup-rap, warning=FALSE, message=FALSE, include=FALSE}

root <- "~/Google Drive/Bwindi project/Code/NULR_Replication/"
#Note: all code and data files should be placed in a single directory and referenced in the line above
#Note: change the file path "root" to the local working directory
setwd(root)

simn <- 10000 #number of RI runs, decrease significantly for fast compilation

audit.v <- read.csv("./Audits_Village_Analysis.csv", stringsAsFactors=FALSE)
audit.c <- read.csv("./Audits_Component_Analysis.csv", stringsAsFactors=FALSE)

survey <- read.csv("./survey_anon.csv")
#note: respodent_name and mobile_phone replaced with unique random values to protect subject anonymity

village.sample <- read.csv("./Village_Sample_MASTER.csv", stringsAsFactors=FALSE)
#note: the village id's come from the master revenue sharing list from UWA.

p2.assigned <- read.csv("./p2samp.csv", stringsAsFactors=FALSE)[,-1] 
#note: this is a modification of "loc.master" with random assignments as "treat2"

p2.ri <- read.csv("./p2_ri.csv", stringsAsFactors=FALSE)[,-1] 
#note: this is a file based on "loc.master" with 10000 random assignment draws

vlist.ordered.dta <- read.csv("./BwindiVillageOrder_190201_JN.csv", stringsAsFactors=FALSE)
#note: this is the list of villages in contiguous order

se.boot <- function(sims=simn,seed=201,vector){
  store <- rep(NA,sims)
  set.seed(seed)
  for (i in 1:sims){
    store[i] <- mean(sample(vector, replace=T))
  }
  return(sd(store))
}


se.boot.cluster <- function(sims=simn,seed=201,vector,clusters){
  store <- rep(NA,sims)
  set.seed(seed)

  v.OK <- !is.na(vector)
  vector <- vector[v.OK]
  clusters <- clusters[v.OK]
  
  clusters_index <- unique(clusters)
  
  for (i in 1:sims){
    sam_clus <- sample(clusters_index, length(clusters_index), replace=TRUE)
    boot_dat <- NULL
    
    for(j in 1:length(sam_clus)){
      cc <- vector[clusters==sam_clus[j]]
      boot_dat <- c(boot_dat, cc)
    }
    
    store[i] <- mean(boot_dat)
  }
  
  return(sd(store))
}


##Function: randomization inference for difference in means
ri.diff <- function(sims, ri.obj, outcome.var, treat.var, treat.ri.var, info.cols){
  ate <- mean(ri.obj[ri.obj[,treat.var]==1,outcome.var], na.rm=TRUE) - mean(ri.obj[ri.obj[,treat.var]==0,outcome.var], na.rm=TRUE)
  store <- rep(NA,sims)
  
  for (i in 1:sims){
    ri.obj[,treat.ri.var] <- ri.obj[,info.cols+i]
    store[i] <- mean(ri.obj[ri.obj[,treat.ri.var]==1,outcome.var], na.rm=TRUE) - mean(ri.obj[ri.obj[,treat.ri.var]==0,outcome.var], na.rm=TRUE)
  }
  
  outcome <- outcome.var
  se.null <- sd(store)
  p.two.way <- sum(abs(ate)<abs(store))/sims
  p.one.way.greater <- sum(ate<=store)/sims
  p.one.way.lesser <- sum(ate>=store)/sims
  
  out <- list("outcome" = outcome, "ate" = ate, "rand.dist" = store, "se.null"=se.null, "p.two.way" = p.two.way, "p.one.way.greater" = p.one.way.greater, "p.one.way.lesser" = p.one.way.lesser)
  return(out)
}


##Function: randomization inference for felm()
felm.ri <- function(formula, dta, treat.var, rand.ob, rand.ob.info.cols, sims, ...){
  require(lfe)
#1. data and rand.ob must have rows organized in identical order")
#2. treat.var should be first right-side entry in formula")
#3. join.var must have identical name between data and rand.ob")
  
  ate <- coef(felm(formula, data=dta))[1]
  N <- felm(formula, data=dta)$N
  ate.samp.dist <- rep(NA,sims)
  
  for (i in 1:sims){
    dta[,treat.var] <- rand.ob[,rand.ob.info.cols+i]
    
    ate.samp.dist[i] <- coef(felm(formula, data=dta))[1]
  }
  
  p.two.way <- sum(abs(ate)<abs(ate.samp.dist))/sims
  p.one.way.greater <- sum(ate<ate.samp.dist)/sims
  p.one.way.lesser <- sum(ate>ate.samp.dist)/sims
  se.null <- sd(ate.samp.dist)
  
  coun <- list("ate" = ate, "ate.samp.dist" = ate.samp.dist, "se.null"=se.null, "p.two.way" = p.two.way, "p.one.way.greater" = p.one.way.greater, "p.one.way.lesser" = p.one.way.lesser, "N" = N)
  return(coun)
}

coef.print <- function(vector){
  sprintf("%.3f",round(vector,3))
}

coef.print2 <- function(vector){
  sprintf("%.2f",round(vector,2))
}

se.print <- function(vector){
  paste("(",sprintf("%.3f",round(vector,3)),")",sep="")
}

se.print2 <- function(vector){
  paste("(",sprintf("%.2f",round(vector,2)),")",sep="")
}

p.print <- function(vector){
  paste("p=",sprintf("%.3f", round(vector,3)),sep="")
}

tab_cell <- function(ri_obj){
  return(tribble(~h,coef.print(ri_obj$ate),se.print(ri_obj$se.null), p.print(ri_obj$p.one.way.greater)))
}

tab_cell_desc <- function(desc,desc.se,i){
  return(tribble(~d,coef.print(desc[i]),se.print(desc.se[i]),""))
}

empty_cell <- tribble(~h, "--", "(--)", "")

###Location ID compile and cleaning -----

village.sample.no.dup <- village.sample[!(duplicated(village.sample$village.name) | duplicated(village.sample$village.name, fromLast = TRUE)),]
#Note: will have to work on all duplicate villages manually

p2.assigned$village.id <- NA
for (i in 1:nrow(p2.assigned)){
  p2.assigned$village.id[i] <- ifelse((sum(p2.assigned$village.final[i]==p2.assigned$village.final)==1 & p2.assigned$village.final[i] %in% village.sample.no.dup$village.name),
                                      village.sample.no.dup$village.id[p2.assigned$village.final[i]==village.sample.no.dup$village.name],
                                      NA)
}

#Filling in the other village.id's manually using fuzzy matches from village.sample
p2.assigned$village.id <- ifelse(p2.assigned$location.id.cleaned=="Rubanda.Ruhija.Kashekyera.Bitanwa", 62, p2.assigned$village.id)
p2.assigned$village.id <- ifelse(p2.assigned$village.final=="Bugoro" & !is.na(p2.assigned$treat2), 48, p2.assigned$village.id)
p2.assigned$village.id <- ifelse(p2.assigned$village.final=="Buhoma", 10, p2.assigned$village.id)
p2.assigned$village.id <- ifelse(p2.assigned$village.final=="Ishaya", 49, p2.assigned$village.id)
p2.assigned$village.id <- ifelse(p2.assigned$village.final=="Kajumo" & !is.na(p2.assigned$treat2), 35, p2.assigned$village.id)
p2.assigned$village.id <- ifelse(p2.assigned$village.final=="Kanyabuhama", 46, p2.assigned$village.id)
p2.assigned$village.id <- ifelse(p2.assigned$village.final=="Kanyashogi", 17, p2.assigned$village.id)
p2.assigned$village.id <- ifelse(p2.assigned$village.final=="Kigaga", 25, p2.assigned$village.id)
p2.assigned$village.id <- ifelse(p2.assigned$village.final=="Kijuma", 47, p2.assigned$village.id)
p2.assigned$village.id <- ifelse(p2.assigned$village.final=="Kitahurira (Hakikome)", 20, p2.assigned$village.id)
p2.assigned$village.id <- ifelse(p2.assigned$village.final=="Kitahurira (Kashasha)", 70, p2.assigned$village.id)
p2.assigned$village.id <- ifelse(p2.assigned$village.final=="Kyogo (Kanungu)", 27, p2.assigned$village.id)
p2.assigned$village.id <- ifelse(p2.assigned$village.final=="Kyogo (Rubanda)", 59, p2.assigned$village.id)
p2.assigned$village.id <- ifelse(p2.assigned$village.final=="Mburameizi", 53, p2.assigned$village.id)
p2.assigned$village.id <- ifelse(p2.assigned$village.final=="Ndego", 74, p2.assigned$village.id)
p2.assigned$village.id <- ifelse(p2.assigned$village.final=="Nyakabungo (Bushura)", 36, p2.assigned$village.id)
p2.assigned$village.id <- ifelse(p2.assigned$village.final=="Nyakabungo (Kashekyera)", 60, p2.assigned$village.id)
p2.assigned$village.id <- ifelse(p2.assigned$village.final=="Nyakabungo (Rubimbwa)", 40, p2.assigned$village.id)
p2.assigned$village.id <- ifelse(p2.assigned$village.final=="Nyamabale", 77, p2.assigned$village.id)
p2.assigned$village.id <- ifelse(p2.assigned$village.final=="Nyamatsinda", 89, p2.assigned$village.id)
p2.assigned$village.id <- ifelse(p2.assigned$village.final=="Omumbuga" & !is.na(p2.assigned$treat2), 45, p2.assigned$village.id)
p2.assigned$village.id <- ifelse(p2.assigned$village.final=="Rushaga", 86, p2.assigned$village.id)
p2.assigned$village.id <- ifelse(p2.assigned$village.final=="Rwensanziro", 50, p2.assigned$village.id)

#Village id's not in sample frame used for assignment: 
#26. Bushegenyi (Ngaara) -- previously indicated as excluded from revenue-sharing
#83. Higabiro (Rubuguri) -- not part of sample frame
#Note: MB sent an email to JN about this on 1/12/18

#Copied identical cleaning for RI object from "p2.assigned" above
p2.ri$village.id <- NA
p2.ri <- p2.ri[,c(1:18,10019,19:10018)] #moving village.id into preliminary columns

for (i in 1:nrow(p2.ri)){
  p2.ri$village.id[i] <- ifelse((sum(p2.ri$village.final[i]==p2.ri$village.final)==1 & p2.ri$village.final[i] %in% village.sample.no.dup$village.name),
                                      village.sample.no.dup$village.id[p2.ri$village.final[i]==village.sample.no.dup$village.name],
                                      NA)
}

#Filling in the other village.id's manually using fuzzy matches from village.sample
#Note: have to use !is.na() from p2.assigned on duplicated; checked that final village names perfectly match
p2.ri$village.id <- ifelse(p2.ri$location.id.cleaned=="Rubanda.Ruhija.Kashekyera.Bitanwa", 62, p2.ri$village.id)
p2.ri$village.id <- ifelse(p2.ri$village.final=="Bugoro" & !is.na(p2.assigned$treat2), 48, p2.ri$village.id)
p2.ri$village.id <- ifelse(p2.ri$village.final=="Buhoma", 10, p2.ri$village.id)
p2.ri$village.id <- ifelse(p2.ri$village.final=="Ishaya", 49, p2.ri$village.id)
p2.ri$village.id <- ifelse(p2.ri$village.final=="Kajumo" & !is.na(p2.assigned$treat2), 35, p2.ri$village.id)
p2.ri$village.id <- ifelse(p2.ri$village.final=="Kanyabuhama", 46, p2.ri$village.id)
p2.ri$village.id <- ifelse(p2.ri$village.final=="Kanyashogi", 17, p2.ri$village.id)
p2.ri$village.id <- ifelse(p2.ri$village.final=="Kigaga", 25, p2.ri$village.id)
p2.ri$village.id <- ifelse(p2.ri$village.final=="Kijuma", 47, p2.ri$village.id)
p2.ri$village.id <- ifelse(p2.ri$village.final=="Kitahurira (Hakikome)", 20, p2.ri$village.id)
p2.ri$village.id <- ifelse(p2.ri$village.final=="Kitahurira (Kashasha)", 70, p2.ri$village.id)
p2.ri$village.id <- ifelse(p2.ri$village.final=="Kyogo (Kanungu)", 27, p2.ri$village.id)
p2.ri$village.id <- ifelse(p2.ri$village.final=="Kyogo (Rubanda)", 59, p2.ri$village.id)
p2.ri$village.id <- ifelse(p2.ri$village.final=="Mburameizi", 53, p2.ri$village.id)
p2.ri$village.id <- ifelse(p2.ri$village.final=="Ndego", 74, p2.ri$village.id)
p2.ri$village.id <- ifelse(p2.ri$village.final=="Nyakabungo (Bushura)", 36, p2.ri$village.id)
p2.ri$village.id <- ifelse(p2.ri$village.final=="Nyakabungo (Kashekyera)", 60, p2.ri$village.id)
p2.ri$village.id <- ifelse(p2.ri$village.final=="Nyakabungo (Rubimbwa)", 40, p2.ri$village.id)
p2.ri$village.id <- ifelse(p2.ri$village.final=="Nyamabale", 77, p2.ri$village.id)
p2.ri$village.id <- ifelse(p2.ri$village.final=="Nyamatsinda", 89, p2.ri$village.id)
p2.ri$village.id <- ifelse(p2.ri$village.final=="Omumbuga" & !is.na(p2.assigned$treat2), 45, p2.ri$village.id)
p2.ri$village.id <- ifelse(p2.ri$village.final=="Rushaga", 86, p2.ri$village.id)
p2.ri$village.id <- ifelse(p2.ri$village.final=="Rwensanziro", 50, p2.ri$village.id)


###Data Cleaning -----

##Adding village ids (per field sheet of projects)

audit.v$village.id <- NA
audit.v <- audit.v[,c(1:5,27,6:26)] #Reording columns to put "village.id" next to "village"

#Loop for unique matches
for (i in 1:nrow(audit.v)){
  audit.v$village.id[i] <- ifelse((sum(audit.v$village[i]==village.sample$village.name)==1 & audit.v$village[i] %in% village.sample.no.dup$village.name),
                                      village.sample.no.dup$village.id[audit.v$village[i]==village.sample.no.dup$village.name],
                                      NA)
}

#Filling in the other village.id's manually using fuzzy matches with village.sample
audit.v$village.id <- ifelse(audit.v$village=="Buhoma", 10, audit.v$village.id)
audit.v$village.id <- ifelse(audit.v$village=="Kanyabuhama", 46, audit.v$village.id)
audit.v$village.id <- ifelse(audit.v$village=="Ruboona", 12, audit.v$village.id)
audit.v$village.id <- ifelse(audit.v$village=="Nyakabungo ( Rubibwa )", 40, audit.v$village.id)
audit.v$village.id <- ifelse(audit.v$village=="Nyakabungo bushura", 36, audit.v$village.id)
audit.v$village.id <- ifelse(audit.v$village=="Ishaya ( Mucusye )", 49, audit.v$village.id)
audit.v$village.id <- ifelse(audit.v$village=="Bugolo", 48, audit.v$village.id)
audit.v$village.id <- ifelse(audit.v$village=="Kanyashogi", 17, audit.v$village.id)
audit.v$village.id <- ifelse(audit.v$village=="Kyogo (mpungu )", 27, audit.v$village.id) #Mpungu is the sub-county
audit.v$village.id <- ifelse(audit.v$village=="Mburameizi", 53, audit.v$village.id)
audit.v$village.id <- ifelse(audit.v$village=="Kyogo rubanda", 59, audit.v$village.id)
audit.v$village.id <- ifelse(audit.v$village=="Kigaga", 25, audit.v$village.id)
audit.v$village.id <- ifelse(audit.v$village=="Nyakabungo kashekyera", 60, audit.v$village.id)
audit.v$village.id <- ifelse(audit.v$village=="Ndengo", 74, audit.v$village.id)
audit.v$village.id <- ifelse(audit.v$village=="Nyambare", 77, audit.v$village.id)
audit.v$village.id <- ifelse(audit.v$village=="Rushaga", 86, audit.v$village.id)
audit.v$village.id <- ifelse(audit.v$village=="Rwensanziro", 50, audit.v$village.id)
audit.v$village.id <- ifelse(audit.v$village=="Nyakabungo ( Kashekyera )", 60, audit.v$village.id)
audit.v$village.id <- ifelse(audit.v$village=="Kitahurira", 20, audit.v$village.id) #Matched using project components from "audit.c"
audit.v$village.id <- ifelse(audit.v$village=="Kyitahurira", 70, audit.v$village.id) #Matched using project components from "audit.c"
#Checked: all villages in sample frame are accounted for and have a village.id

#Adding treatment assignment for accountability phase
p2.assigned <- p2.assigned[!is.na(p2.assigned$village.id),]
for (i in 1:nrow(audit.v)){
  audit.v$treat[i] <- ifelse(length(p2.assigned$treat2[p2.assigned$village.id==audit.v$village.id[i]])>0,
                             p2.assigned$treat2[p2.assigned$village.id==audit.v$village.id[i]], NA)
}
#This brings in the treatment assigned for the accountability phase, which should allow descriptive statistics to be computed

#Pulls treatment, village ID, district, parish, and subcounty information into the audit.c file
audit.c$treat <- NA
audit.c$village_id <- NA
audit.c$district <- NA
audit.c$subcounty <- NA
audit.c$parish <- NA

for (i in 1:nrow(audit.c)) {audit.c$treat[i] <- audit.v$treat[audit.c$X_parent_index[i]]}
for (i in 1:nrow(audit.c)) {audit.c$village_id[i] <- audit.v$village.id[audit.c$X_parent_index[i]]}

for (i in 1:nrow(audit.c)) {
  for (j in 1:nrow(p2.assigned)) {
    if (audit.c$village_id[i] == p2.assigned$village.id[j]) {
      audit.c$district[i] <- p2.assigned$district.orig[j]; 
      audit.c$subcounty[i] <- p2.assigned$subcounty.orig[j]; 
      audit.c$parish[i] <- p2.assigned$parish.orig[j];
      audit.c$sc.block[i] <- p2.assigned$sc.block[j]
    }
  }
} #Note: loop involves overwriting j many times, but works quickly
#Note: NAs caused by measurements taken outside of sample frame
#Note: confirmed that no slight mispellings in district, subcounty, or parish strings

#Creates dummy variable for whether the component is PAM (1 if it is PAM)
audit.c$PAM <- NA

for (i in 1:nrow(audit.c)) {
  ifelse(audit.c$PAM_part[i] == "yes", audit.c$PAM[i] <- 1, audit.c$PAM[i] <- 0)
}

###Combining audits of same villages separated
#Note: using audit.c as the main file for analysis, it is not necessary to merge separate audit.v entries (other than for later analysis of information source)


###Removing training or incorrect entries
audit.c <- audit.c[,c(69,68,1:67,70:74)] #Re-arranging columns for convenience, moving village_id to front

#Creating objects to code shared projects for later exclusion/analysis
audit.c$shared <- 0
audit.c$shared.ids <- NA
audit.c <- audit.c[,c(1:3,75:76,4:74)] #Re-arranging columns for convenience, moving up shared variables


#Manually noting villages that have components that do not exactly match (https://docs.google.com/document/d/1li_seH-D1vbxm7awX7Cuvlhya6nOQnfwonc6qZktwbU/edit?usp=sharing):
#1-6: share non-PAM project, which means non-independent outcomes
audit.c$shared <- ifelse(audit.c$village_id %in% 1:6 & (audit.c$Description_of_Project_Part=="Community camp" | audit.c$Description_of_Project_Part=="Construction of  community  camp (shared with the Parish)"), 1, 0)
audit.c$shared.ids <- ifelse(audit.c$village_id %in% 1:6 & (audit.c$Description_of_Project_Part=="Community camp" | audit.c$Description_of_Project_Part=="Construction of  community  camp (shared with the Parish)"),
                             "1,2,3,4,5,6",audit.c$shared.ids)

#7,10: problems w/ approved vs. not approved data, extra road
#(2/6): inquiry sent to Jeremiah about extra road --> the road is shared with village 10
audit.c$shared <- ifelse(audit.c$village_id %in% c(7,10) & (audit.c$Description_of_Project_Part=="Construction  of Nyabitsika/Kabumba road shared with Nkwenda  village" | audit.c$Description_of_Project_Part=="Opening of community feeder road (nyabisiika kabumba"), 1, audit.c$shared)
audit.c$shared.ids <- ifelse(audit.c$village_id %in% c(7,10) & (audit.c$Description_of_Project_Part=="Construction  of Nyabitsika/Kabumba road shared with Nkwenda  village" | audit.c$Description_of_Project_Part=="Opening of community feeder road (nyabisiika kabumba"),
                             "7,10",audit.c$shared.ids)

#10: PAM component duplicated
grab <- audit.c[audit.c$X_index==15,c("Picture_of_PAM_work_3","Description_of_PAM_work_3")]
audit.c[audit.c$X_index==14,c("Picture_of_PAM_work_2","Description_of_PAM_work_2")] <- grab #From X_index==15
audit.c[audit.c$X_index==14,"reasons_evidence_lacking.poor_no_labels"] <- 1 #From X_index==15
audit.c <- audit.c[!audit.c$X_index==15,] #removing X_index==15

grab <- audit.c[audit.c$X_index==16,c("Picture4","Description_of_Picture4")] #From X_index==16
audit.c[audit.c$X_index==14,c("Picture3","Description_of_Picture3")] <- grab #From X_index==16
audit.c <- audit.c[!audit.c$X_index==16,] #removing X_index==16

#11-12: shared health center, which means non-independent outcomes
audit.c$shared <- ifelse(audit.c$village_id==11 & audit.c$Description_of_Project_Part=="Health centre construction",1,audit.c$shared)
audit.c$shared <- ifelse(audit.c$village_id==12 & audit.c$Description_of_Project_Part=="Community health centre",1,audit.c$shared)
audit.c$shared.ids <- ifelse(audit.c$village_id %in% 11:12 & (audit.c$Description_of_Project_Part=="Health centre construction" | audit.c$Description_of_Project_Part=="Community health centre"),
                             "11,12",audit.c$shared.ids)

#12: PAM component duplicated with erroneous entry
audit.c <- audit.c[!(audit.c$village_id==12 & audit.c$Description_of_Project_Part=="None"),]

#13: one erroneous entry
audit.c <- audit.c[!(audit.c$village_id==13 & audit.c$Description_of_Project_Part=="No project part (just an error)"),]

#14: many PAM / goat duplicates
# duplicate entries are exact copies
audit.c <- audit.c[!audit.c$X_index==45,] #removing X_index==45
audit.c <- audit.c[!audit.c$X_index==46,] #removing X_index==46
audit.c <- audit.c[!audit.c$X_index==49,] #removing X_index==49
audit.c <- audit.c[!audit.c$X_index==50,] #removing X_index==50

#20-24: shared community camp, which means non-independent outcomes
audit.c$shared <- ifelse(audit.c$village_id %in% 20:24 & (audit.c$Description_of_Project_Part=="Construction of Buremba community tourist center" | audit.c$Description_of_Project_Part=="Community  camp" | audit.c$Description_of_Project_Part=="Construction of  Buremba community  tourism centre" | audit.c$Description_of_Project_Part=="Construction of Buremba community tourism center" | audit.c$Description_of_Project_Part=="Construction of bureau community tourism centre"), 1, audit.c$shared)
audit.c$shared.ids <- ifelse(audit.c$village_id %in% 20:24 & (audit.c$Description_of_Project_Part=="Construction of Buremba community tourist center" | audit.c$Description_of_Project_Part=="Community  camp" | audit.c$Description_of_Project_Part=="Construction of  Buremba community  tourism centre" | audit.c$Description_of_Project_Part=="Construction of Buremba community tourism center" | audit.c$Description_of_Project_Part=="Construction of bureau community tourism centre"),
                             "20,21,22,23,24",audit.c$shared.ids)

#26: village not part of sample frame (Bushegenyi)
audit.c <- audit.c[!(audit.c$village_id==26),]

#28-29: shared road, which means non-independent outcomes
audit.c$shared <- ifelse(audit.c$village_id==28 & audit.c$Description_of_Project_Part=="Opening karambi kyambeya,feeder road",1,audit.c$shared)
audit.c$shared <- ifelse(audit.c$village_id==29 & audit.c$Description_of_Project_Part=="Opening of karambi-kibingo community road",1,audit.c$shared)
audit.c$shared.ids <- ifelse(audit.c$village_id %in% 28:29 & (audit.c$Description_of_Project_Part=="Opening karambi kyambeya,feeder road" | audit.c$Description_of_Project_Part=="Opening of karambi-kibingo community road"),
                             "28,29",audit.c$shared.ids)

#33: duplicate PAM and goat entries
# duplicate uploads that are exact copies
audit.c <- audit.c[!(audit.c$village_id==33 & audit.c$X_parent_index==29),]

#41: duplicate PAM entries
grab <- audit.c[audit.c$X_index==38,c("Picture9","Description_of_Picture9","Picture10","Description_of_Picture10")] #From X_index==38
audit.c[audit.c$X_index==42,c("Picture9","Description_of_Picture9","Picture10","Description_of_Picture10")] <- grab #From X_index==38
audit.c[audit.c$X_index==42,c("dispersed_num_shown")] <- 10 #New value of PAM observations (from X_index==38)
audit.c <- audit.c[!audit.c$X_index==38,] #removing X_index==38

#42: duplicate PAM entries
# third component (X_index==84) is noted as an error in audit.v
audit.c <- audit.c[!audit.c$X_index==84,] #removing X_index==84

#48-49: potentially shared school, check with Jeremiah (2/5/18 Slack: confirmed by Jeremiah as shared)
audit.c$shared <- ifelse(audit.c$village_id %in% 48:49 & (audit.c$Description_of_Project_Part=="Construction of two class rooms" | audit.c$Description_of_Project_Part=="Completion of a two classroom block"), 1, audit.c$shared)
audit.c$shared.ids <- ifelse(audit.c$village_id %in% 48:49 & (audit.c$Description_of_Project_Part=="Construction of two class rooms" | audit.c$Description_of_Project_Part=="Completion of a two classroom block"),
                             "48,49",audit.c$shared.ids)

#57! audit of PAM component missing (Slack 2/5/18: the PAM component was never implemented in this village)
#Note: adding missing PAM component manually to downloaded file, per Jeremiah response
add57 <- c(57,0,
           "PAM",0,NA,
           "yes","approved","not_start_not_","dispersed","OK","OK","",NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,
           "","","","","","","","","","","","","","","","","","","","",
           "",0,"not_ver","not_delivered",0,0,0,0,0,0,0,1,
           "PAM for the whole of Ruhija Sub county was never implemented due to corruption at the subcounty. There was change of sub county chief after the 1st one being suspended and the new one only finding the procurement process finished. It is alleged that the former sub county chief never purchased any PAM item and kept the money",
           "","","","","","","OK",197,"Bwindi Accountability Endline LC/PMC physical audit (Final)",70,
           "Kabale","Ruhija","Kiyebe",1)
audit.c <- rbind(audit.c,add57)

#83: not measured correctly (doesn't matter because not in sample)
audit.c <- audit.c[!(audit.c$village_id==83),]

#87: sheep rearing duplicated
# removing unreliable audit that Jeremiah decided need to be redone
audit.c <- audit.c[!(audit.c$village_id==87 & audit.c$X_parent_index==92),]

#90: extra entry for PAM, which was not part of proposal
## Enumerator error, the extra entry doesn't correspond to a PAM project, just duplicates some of the info on the school
audit.c <- audit.c[!(audit.c$X_index==170),] #removing X_index==170

## Cross-tabulation of shared components
table(audit.c$treat,audit.c$shared)
prop.table(table(audit.c$treat,audit.c$shared),1)
chisq.test(table(audit.c$treat,audit.c$shared))



#Checking all "not_approved" entries against master RS list
## X_index == 2: shared road, not approved for Nkwenda, but was approved for Buhoma Central
## X_index == 71: Goat rearing was not approved
## X_index == 87: PAM was not approved
## X_index == 101: PAM was approved
## X_index == 123: Goat rearing was approved
## X_index == 166: Sheep rearing was approved
## X_index == 167: PAM was approved
## X_index == 180: Goat rearing was not approved
## X_index == 183: Sheep rearing was not approved, it was implemented instead of the approved goat rearing project
## X_index == 188: gravity water taps were approved, but never implemented. Money was instead used on the road.
## Fixing projects that were incorrectly labeled as not approved:
audit.c$project_part_approved <- ifelse(audit.c$X_index == 101 | audit.c$X_index == 123 | audit.c$X_index == 166 | audit.c$X_index == 167 | audit.c$X_index == 188, "approved", audit.c$project_part_approved)


###Compiling/Cleaning outcome data -----

#Note: there are many "dispersed" projects marked "not_dispersed", but not the other way
#Note: recoding "dispersed" projects marked incorrectly
#sub.c.nd <- subset(audit.c, shared==0 & dispersed_status=="not_dispersed")
#sub.c.d <- subset(audit.c, shared==0 & dispersed_status=="dispersed")
#see <- sub.c.nd[,c(1:3,69)] #convenience object to determine miscoded components
#Note: X=index=c(175,182) these are not_dispersed red chili components, OK.
nd2d <- c(127,132,184:187,189:191) #these are X_index values incorrect marked as "not_dispersed"
audit.c$dispersed_status <- ifelse(audit.c$X_index %in% nd2d, "dispersed", audit.c$dispersed_status)

sub.c.nd <- subset(audit.c, shared==0 & dispersed_status=="not_dispersed")
sub.c.d <- subset(audit.c, shared==0 & dispersed_status=="dispersed")

audit.c$Verifiable <- ifelse(audit.c$X_index==79, "somewhat_ver", audit.c$Verifiable) #Filling in missing value based on pictures and description

#Checking on missing data for "dispersed_num_shown"
#see <- audit.c[is.na(audit.c$dispersed_num_shown),] #convenience object for cleaning
#see2 <- audit.c[!is.na(audit.c$dispersed_num_shown),] #convenience object for cleaning

#Using presense of pictures to recode these values within dispersed / approved project subset
sub.c.d$pics <- 0
sub.c.d$pics <- ifelse(sub.c.d$Picture1=="",sub.c.d$pics,1)
sub.c.d$pics <- ifelse(sub.c.d$Picture2=="",sub.c.d$pics,2)
sub.c.d$pics <- ifelse(sub.c.d$Picture3=="",sub.c.d$pics,3)
sub.c.d$pics <- ifelse(sub.c.d$Picture4=="",sub.c.d$pics,4)
sub.c.d$pics <- ifelse(sub.c.d$Picture5=="",sub.c.d$pics,5)
sub.c.d$pics <- ifelse(sub.c.d$Picture6=="",sub.c.d$pics,6)
sub.c.d$pics <- ifelse(sub.c.d$Picture7=="",sub.c.d$pics,7)
sub.c.d$pics <- ifelse(sub.c.d$Picture8=="",sub.c.d$pics,8)
sub.c.d$pics <- ifelse(sub.c.d$Picture9=="",sub.c.d$pics,9)
sub.c.d$pics <- ifelse(sub.c.d$Picture10=="",sub.c.d$pics,10)
table(sub.c.d$treat,sub.c.d$pics)

table(sub.c.d$pics, sub.c.d$dispersed_num_shown) #There is a small amount of mismatch between number of pictures and number reported by enumerators
check <- subset(sub.c.d, pics!=dispersed_num_shown)

#X_index values checked in object "check"
#11: mismatch due to duplicate components merged
sub.c.d$pics[sub.c.d$X_index==111] <- 0 #The project shown is not the project component listed (it is an Irish potatoe garden planted w/ money from RS)
sub.c.d$pics[sub.c.d$X_index==141] <- 1 #Only one set of PAM items is captures in photographs; other photographs are PAM work completed, which should be captured separately; list of items is not an item observed
#151: "pics" value is correct; enumerator entered wrong value for "dispersed_num_shown"
#180: "pics" value is correct; enumerator entered wrong value for "dispersed_num_shown"

#Note: non-missing data for "dispersed_num_shown" have been superceded by the "pics" variable that is cleaned for actual number of pictures recorded

audit.c$pics <- NA
for (i in 1:nrow(audit.c)){
  sub <- subset(sub.c.d, X_index==audit.c$X_index[i])
  
  if (nrow(sub) > 0){
    audit.c$pics[i] <- sub$pics[1]
  }
} #Checked: merged correctly


###Merging RI object into audit.c for ease of use -----

p2.ri <- p2.ri[!is.na(p2.ri$village.id) & (p2.ri$village.id %in% audit.c$village_id),]
names(p2.ri)[5] <- "treat.ph1"

audit.c.ri <- merge(audit.c, p2.ri, by.x="village_id", by.y="village.id", all.x=TRUE, all.y=FALSE)
#Checked: number of rows does not change
#To Do: check discrepancy between sc.block.x and sc.block.y

audit.c.ri$t.sub <- NA

## Survey Data Cleaning ----

###Adding village ids (per sheet of projects used during auditing and field enumeration)
#To Do: use "village.sample" to match and add village.id, which is going to be the merge variable across all datasets

survey$village.id <- NA
survey <- survey[,c(1:4,79,5:78)] #Reording columns to put "village.id" next to "Village_Name"

#Loop for unique matches
for (i in 1:nrow(survey)){
  survey$village.id[i] <- ifelse((sum(survey$Village_Name[i]==village.sample$village.name)==1 & survey$Village_Name[i] %in% village.sample.no.dup$village.name),
                                  village.sample.no.dup$village.id[survey$Village_Name[i]==village.sample.no.dup$village.name],
                                  NA)
}

table(survey$village.id, useNA = "always") #384 entries remaining to be cleaned

survey$start <- as.character(survey$start)
substr(survey$start, 11, 11) <- " "
survey$survey.time <- strptime(survey$start, "%F %T")

#Loop for interpolated enumerator matches
for (i in 1:nrow(survey)){
  if (is.na(survey$village.id[i])){
    sub.earlier <- subset(survey, Enumerator_name==survey$Enumerator_name[i] & !is.na(village.id) & survey$survey.time<survey$survey.time[i])
    sub.earlier <- sub.earlier[rev(order(sub.earlier$survey.time)),]
    sub.later <- subset(survey, Enumerator_name==survey$Enumerator_name[i]  & !is.na(village.id) & survey$survey.time>survey$survey.time[i])
    sub.later <- sub.later[order(sub.later$survey.time),]
    
    survey$village.id[i] <- ifelse(sub.earlier$village.id[1]==sub.later$village.id[1], sub.earlier$village.id[1], NA)
  }
}

table(survey$village.id, useNA = "always") #365 entries remaining to be cleaned

unique(survey$Village_Name[is.na(survey$village.id)]) #These are all the village names that need to be cleaned manually

#Filling in the other village.id's manually using fuzzy matches with village.sample
survey$village.id <- ifelse(survey$Village_Name=="Rushaga", 86, survey$village.id)
survey$village.id <- ifelse(survey$Village_Name=="Ruboona", 12, survey$village.id)
survey$village.id <- ifelse(survey$Village_Name=="Nyambare", 77, survey$village.id)
survey$village.id <- ifelse(survey$Village_Name=="Nyakango kashekyera", 60, survey$village.id)
survey$village.id <- ifelse(survey$Village_Name=="Nyakabungo kashekyera", 60, survey$village.id)
survey$village.id <- ifelse(survey$Village_Name=="Nyakabungo bushara", 36, survey$village.id)
survey$village.id <- ifelse(survey$Village_Name=="Nyakabungo bushura", 36, survey$village.id)
survey$village.id <- ifelse(survey$Village_Name=="Nyakabungo ( Rubibwa )", 40, survey$village.id)
survey$village.id <- ifelse(survey$Village_Name=="Ndengo", 74, survey$village.id)
survey$village.id <- ifelse(survey$Village_Name=="Ndego", 74, survey$village.id)
survey$village.id <- ifelse(survey$Village_Name=="Mburameizi", 53, survey$village.id)
survey$village.id <- ifelse(survey$Village_Name=="Kyteitokwa", 64, survey$village.id)
survey$village.id <- ifelse(survey$Village_Name=="Kyoto rubanda", 59, survey$village.id)
survey$village.id <- ifelse(survey$Village_Name=="Kyogo rubanda", 59, survey$village.id)
survey$village.id <- ifelse(survey$Village_Name=="Kyogo ( mpungu )", 27, survey$village.id)
survey$village.id <- ifelse(survey$Village_Name=="Kitojo", 67, survey$village.id)
survey$village.id <- ifelse(survey$Village_Name=="Kigaga", 25, survey$village.id)
survey$village.id <- ifelse(survey$Village_Name=="Kanyashogi", 17, survey$village.id)
survey$village.id <- ifelse(survey$Village_Name=="Kanyabuhama", 46, survey$village.id)
survey$village.id <- ifelse(survey$Village_Name=="Ishaya(Mucusye )", 49, survey$village.id)
survey$village.id <- ifelse(survey$Village_Name=="Ishaya (Mucusye )", 49, survey$village.id)
survey$village.id <- ifelse(survey$Village_Name=="Ishaya (Mucusye)", 49, survey$village.id)
survey$village.id <- ifelse(survey$Village_Name=="Ishaya ( Mucusye )", 49, survey$village.id)
survey$village.id <- ifelse(survey$Village_Name=="Buhomo", 10, survey$village.id)
survey$village.id <- ifelse(survey$Village_Name=="Buhoma", 10, survey$village.id)
survey$village.id <- ifelse(survey$Village_Name=="Bugolo", 48, survey$village.id)
#MB: it is always dangerous in code to use hanging indices as below, which will not work if something accidently gets sorted somewhere else.
survey$village.id[74] <- 12
survey$village.id[546] <- 40
survey$village.id[548] <- 40
survey$village.id[555] <- 40
survey$village.id[637] <- 36
survey$village.id[641] <- 36
survey$village.id[759] <- 27
survey$village.id[996] <- 59
survey$village.id[1169] <- 60
survey$village.id[1290] <- 74

##Kitahurira village surveys done by Wilber are for Kitahurira (Kashasha), because Wilber was primarily auditing in the Kabale district. 
##Kitahurira village surveys done by Kenneth are then Kitahurira (Hakikome). Filling in the village IDs:
for (i in 1468:1489) {
  survey$village.id[i] <- 70
}

for (i in 796:805) {
  survey$village.id[i] <- 20
}
survey$village.id[830] <- 20
survey$village.id[838] <- 20
survey$village.id[845] <- 20
survey$village.id[847] <- 20
survey$village.id[848] <- 20
survey$village.id[849] <- 20
survey$village.id[852] <- 20
survey$village.id[857] <- 20
survey$village.id[912] <- 20
survey$village.id[913] <- 20

table(survey$village.id, useNA = "always") #Confirmed: no entries without village id values

#Adding treatment assignment for accountability phase
p2.assigned <- p2.assigned[!is.na(p2.assigned$village.id),]
for (i in 1:nrow(survey)){
  survey$treat[i] <- ifelse(length(p2.assigned$treat2[p2.assigned$village.id==survey$village.id[i]])>0,
                             p2.assigned$treat2[p2.assigned$village.id==survey$village.id[i]], NA)
}
#This brings in the treatment assigned for the accountability phase, which should allow descriptive statistics to be computed

### Fill in incorrect values for age with village mean
survey$age_cleaned <- survey$age
survey$age_cleaned[275] <- NA
survey$age_cleaned[275] <- mean(survey[survey$village.id==46,"age_cleaned"], na.rm=TRUE)
survey$age_cleaned[1423] <- NA
survey$age_cleaned[1423] <- mean(survey[survey$village.id==80,"age_cleaned"], na.rm=TRUE)

## Compiling survey outcome data ----

# Create columns for the linear scales of each variable of interest
survey$subcounty <- NA
survey$listed_beneficiary_this_year_scale <- NA
survey$expect_benefit_this_year_scale <- NA
survey$satisfied_with_planning_scale <- NA
survey$satisfied_with_implementation_scale <- NA
survey$satisfied_with_BNP_management_scale <- NA
survey$satisfied_with_RS_scale <- NA
survey$important_to_conserve_scale <- NA
survey$benefits_valuable_scale <- NA
survey$sc.block <- NA
#To Do: automate the number of observations in the tables

# Create linear scales for each output variable of interest
survey$pic_shown <- NA
survey$willing_binary <- NA
survey$project_implemented_as_planned_binary <- NA

for (i in 1:nrow(survey))
{
  survey$pic_shown[i] <- ifelse(survey$pic1[i] == "" & survey$pic2[i] == "" & survey$pic3[i] == "", 0, 1)
  survey$willing_binary[i] <- ifelse(survey$The_last_question_is_more_of_a[i] == "willing", 1, 0)
  survey$project_implemented_as_planned_binary[i] <- ifelse(survey$project_implemented_as_planned[i] == "yes", 1, 0)
}

for (i in 1:nrow(survey))
{
  for (j in 1:nrow(p2.assigned))
  {
    survey$subcounty[i] <- ifelse(survey$village.id[i] == p2.assigned$village.id[j], p2.assigned$subcounty.orig[j], survey$subcounty[i])
    survey$sc.block[i] <- ifelse(survey$village.id[i] == p2.assigned$village.id[j], p2.assigned$sc.block[j], survey$sc.block[i])
  }
  
  survey$listed_beneficiary_this_year_scale[i] <- ifelse(survey$listed_beneficiary_this_year[i] == "yes", 1, 0)
  
  survey$expect_benefit_this_year_scale[i] <- ifelse(survey$expect_benefit_this_year[i] == "yes", 1, ifelse(survey$expect_benefit_this_year[i] == "no", 0, NA))
  
  survey$satisfied_with_planning_scale[i] <- ifelse(survey$satisfied_with_planning[i] == "very_dissatisf", 0, 
                                                    ifelse(survey$satisfied_with_planning[i] == "somewhat_dissa", 1,
                                                           ifelse(survey$satisfied_with_planning[i] == "somewhat_satis", 2,
                                                                  ifelse(survey$satisfied_with_planning[i] == "very_satisfied", 3, NA))))
  
  survey$satisfied_with_implementation_scale[i] <- ifelse(survey$satisfied_with_implementation[i] == "very_dissatisf", 0, 
                                                    ifelse(survey$satisfied_with_implementation[i] == "somewhat_dissa", 1,
                                                           ifelse(survey$satisfied_with_implementation[i] == "somewhat_satis", 2,
                                                                  ifelse(survey$satisfied_with_implementation[i] == "very_satisfied", 3, NA))))
  
  survey$satisfied_with_BNP_management_scale[i] <- ifelse(survey$satisfied_with_BNP_management[i] == "very_dissatisf", 0, 
                                                    ifelse(survey$satisfied_with_BNP_management[i] == "somewhat_dissa", 1,
                                                           ifelse(survey$satisfied_with_BNP_management[i] == "neutral", 2,
                                                           ifelse(survey$satisfied_with_BNP_management[i] == "somewhat_satis", 3,
                                                                  ifelse(survey$satisfied_with_BNP_management[i] == "very_satisfied", 4, NA)))))
  
  survey$satisfied_with_RS_scale[i] <- ifelse(survey$satisfied_with_RS[i] == "very_dissatisf", 0, 
                                                          ifelse(survey$satisfied_with_RS[i] == "somewhat_dissa", 1,
                                                                 ifelse(survey$satisfied_with_RS[i] == "neutral", 2,
                                                                        ifelse(survey$satisfied_with_RS[i] == "somewhat_satis", 3,
                                                                               ifelse(survey$satisfied_with_RS[i] == "very_satisfied", 4, NA)))))
  
  survey$important_to_conserve_scale[i] <- ifelse(survey$important_to_conserve[i] == "not_very_impor", 0, 
                                              ifelse(survey$important_to_conserve[i] == "somewhat_impor", 1,
                                                     ifelse(survey$important_to_conserve[i] == "very_important", 2, NA)))
  
  survey$benefits_valuable_scale[i] <- ifelse(survey$benefits_valuable[i] == "not_at_all_val", 0, 
                                                  ifelse(survey$benefits_valuable[i] == "not_very_valua", 1,
                                                         ifelse(survey$benefits_valuable[i] == "somewhat_valua", 2, 
                                                                ifelse(survey$benefits_valuable[i] == "very_valuable", 3, NA))))
}

survey$chose_again_num <- ifelse(survey$chose_the_same_project_again=="yes",1,0) #not in R&P replication
survey <- subset(survey, !is.na(survey$treat))

p2.ri <- p2.ri[!is.na(p2.ri$village.id) & (p2.ri$village.id %in% survey$village.id),]
names(p2.ri)[5] <- "treat.ph1"
p2.ri <- p2.ri[-c(50,84,51),]

survey.ri <- merge(x=survey, y=p2.ri, by.x="village.id", by.y="village.id", all.x=TRUE, all.y=FALSE)
survey.ri <- survey.ri[!is.na(survey.ri$treat), ]
survey.ri$t.sub <- NA

```

```{r fig-attendance, fig.width=6, fig.height=6}
recruited.subjects$location.id <- paste(recruited.subjects$Subcounty,recruited.subjects$updated.village,sep=".")
lc1.planning$location.id <- paste(lc1.planning$Sub.County,lc1.planning$Village,sep=".")

loc.check <- lc1.planning$location.id
# loc.check %in% recruited.subjects$location.id
see <- lc1.planning[!(loc.check %in% recruited.subjects$location.id),] #LC1 data without matches
#Kirundo.Higabiro: no apparent match with sampling frame
#Kayonza.Musheija
lc1.planning$location.id <- ifelse(lc1.planning$location.id=="Kayonza.Musheija", "Kayonza.Mushija", lc1.planning$location.id)
#Mpungu.Hakikome (Kitahurira): Parish/Village switched, see below
#Kirima.Mutojo/Bugandaro: combined meeting for unexplained reasons, excluding from analysis
#Ruhija.Kyogo (Rubanda): no apparent match with sampling frame

# unique(recruited.subjects$location.id) %in% loc.check
# unique(recruited.subjects$location.id)[!(unique(recruited.subjects$location.id) %in% loc.check)]

recruited.subjects$location.id <- ifelse(recruited.subjects$location.id=="Mpungu.Kitahurira (Hakikome)", "Mpungu.Hakikome (Kitahurira)", recruited.subjects$location.id)

for (i in 1:nrow(lc1.planning)){
  sub <- subset(recruited.subjects, location.id==lc1.planning$location.id[i])
  lc1.planning$treat[i] <- sub$treat[1]
}

lc1.planning$Reason.Attendance.Data.Missing[c(5,19,25,35,47,48,55,75,76,81,85,94)] <- "Subcounty project"
#Note: this is cleaning the column

# lc1.planning.final <- subset(lc1.planning, !is.na(treat))
# sum(!is.na(lc1.planning.final$Attendance))
# 
# lc1.planning.final$attr <- ifelse(is.na(lc1.planning.final$Attendance), 1, 0)
# 
# attr.mod <- lm(attr ~ treat, data=lc1.planning.final)
# summary(attr.mod)
# 
# plan.nonSC <- subset(lc1.planning.final, Reason.Attendance.Data.Missing!="CCR could not find data")
# attr.mod <- lm(attr ~ treat, data=plan.nonSC)
# summary(attr.mod)
# 
# table(lc1.planning.final$Reason.Attendance.Data.Missing)

# par(default.par)
# par(mar=c(2.1,4.1,1.1,1.1))
# boxplot(Attendance ~ treat, data= lc1.planning, names=c("Control","Treated"),range=0, ylab="Number of Residents Attending Planning Meeting")
# means <- tapply(lc1.planning$Attendance,lc1.planning$treat, function(x) mean(x,na.rm=T))
# points(means,col="red",pch=18,cex=2)

labels <- c('0' = "Control", '1' = "Treatment")

ggplot(data=lc1.planning[!is.na(lc1.planning$treat) & !is.na(lc1.planning$Attendance),], 
       aes(x=Attendance, group=treat, fill=treat)) +
    geom_histogram(binwidth=10) +
    theme_minimal() +
    facet_grid(treat ~ ., , labeller=labeller(treat = labels)) +
    theme(
      legend.position="none",
      panel.spacing = unit(0.1, "lines"),
      axis.ticks.x=element_blank(),
      strip.background = element_rect(colour="blue", fill="white"),
      strip.text.y = element_text(size=12, face="bold")) +
  scale_x_continuous(breaks=c(seq(from=10,to=150, by=10)),
                     minor_breaks = seq(0, 150, 10)) +
  xlab("Attendance of Village Residents at Planning Meeting")

```

```{r fig-conditional-reached, fig.width=6, fig.height=9, warning=FALSE}
#Modified from Fig. 9 in WD article
root <- "~/Google Drive/Bwindi project/Code/WD_Replication"
#Note: all code and data files should be placed in a single directory and referenced in the line above
#Note: download from: https://doi.org/10.7910/DVN/15YF0R
#Note: change the file path "root" to the local working directory
setwd(root)
source('outcome_var_mean_imp_setup.R')

setwd(here())
source('inter.binning.90_v2.R')

M.ri <- merge(M, sbj_RI, by="Subject_ID_number", all.x=TRUE)

sat.bin <- inter.binning.90(Y="e.satisfaction.b2", D="treat", X="subjects.per.village", Z="b.satisfaction.b2", FE="subcounty.blocking", data=M, 
                            Ylabel = "Outcome", Dlabel = "Treatment", Xlabel="Number of Subjects Per Village", main="Satisfied with Park Management", nbins=4, na.rm=TRUE,
                            ylim=c(-0.72,0.85))
park.sat.bin <- sat.bin$graph

info.bin <- inter.binning.90(Y="e.knowledge.b7", D="treat", X="subjects.per.village", Z="b.knowledge.b7", FE="subcounty.blocking", data=M, 
                             Ylabel = "Outcome", Dlabel = "Treatment", Xlabel="Number of Subjects Per Village", main="Satisfied with Information from UWA about RS", nbins=4, na.rm=TRUE)
info.from.UWA.bin <- info.bin$graph

import.bin <- inter.binning.90(Y="e.conservation.b6", D="treat", X="subjects.per.village", Z="b.conservation.b6", FE="subcounty.blocking", data=M, 
                               Ylabel = "Outcome", Dlabel = "Treatment", Xlabel="Number of Subjects Per Village", main="Importance of Protecting Bwindi", nbins=4, na.rm=TRUE)
con.support.bin <- import.bin$graph

partic.bin <- inter.binning.90(Y="e.participate.e4", D="treat", X="subjects.per.village", FE="subcounty.blocking", data=M, 
                               Ylabel = "Outcome", Dlabel = "Treatment", Xlabel="Number of Subjects Per Village", main="Participated in RS Meeting (past month)", nbins=4, na.rm=TRUE)
part.bin <- partic.bin$graph

multiplot(part.bin,park.sat.bin,info.from.UWA.bin,con.support.bin, cols=1) #Modified from WD Fig. 9
```

```{r tab-participation, results="asis"}
#M is the master object for the participation phase

#e.participate.b11 #Opportunities to communicate with UWA
#e.participate.b10 #Opportunities to participate in planning
#e.participate.e4 #participated in planning meeting

# test computations
# diff_summ(data=M,outcome=e.participate.b11,treat=treat,sims=500)
# split_summ(data=M,outcome=e.participate.b11,treat=treat,sims=500,split.var = E_subject_s_gender)

M <- M %>% mutate(income.class=case_when(E_Subject_s_income == "20_000_shillin" | E_Subject_s_income == "20_000___100_0" ~ "impoverished",
                                         E_Subject_s_income == "100_000___200_" | E_Subject_s_income == "200_000___500_" | E_Subject_s_income == "500_000___1_mi" |
                                         E_Subject_s_income == "1_million_shil" ~ "not impoverished",
                                         E_Subject_s_income == "refused_to_ans" ~ NA_character_))

M <- M %>% mutate(lit.class=case_when(E_Can_you_read_001 == "yes__i_can_rea" ~ "literate",
                                      E_Can_you_read_001 == "yes__i_can_som" | E_Can_you_read_001 == "no__but_i_have" | 
                                      E_Can_you_read_001 == "no__i_cannot_r" ~ "not fully literate"))

M$e.participate.e4 <- as.numeric(M$e.participate.e4)

set.seed(101)
b11.pooled <- diff_summ(data=M,outcome=e.participate.b11,treat=treat,sims=500)
b11.gender <- split_summ(data=M,outcome=e.participate.b11,treat=treat,sims=500,split.var = E_subject_s_gender)
b11.income <- split_summ(data=M,outcome=e.participate.b11,treat=treat,sims=500,split.var = income.class)
b11.lit <- split_summ(data=M,outcome=e.participate.b11,treat=treat,sims=500,split.var = lit.class)
b11 <- bind_rows(b11.pooled,b11.gender,b11.income,b11.lit)

b10.pooled <- diff_summ(data=M,outcome=e.participate.b10,treat=treat,sims=500)
b10.gender <- split_summ(data=M,outcome=e.participate.b10,treat=treat,sims=500,split.var = E_subject_s_gender)
b10.income <- split_summ(data=M,outcome=e.participate.b10,treat=treat,sims=500,split.var = income.class)
b10.lit <- split_summ(data=M,outcome=e.participate.b10,treat=treat,sims=500,split.var = lit.class)
b10 <- bind_rows(b10.pooled,b10.gender,b10.income,b10.lit)

e4.pooled <- diff_summ(data=M,outcome=e.participate.e4,treat=treat,sims=500)
e4.gender <- split_summ(data=M,outcome=e.participate.e4,treat=treat,sims=500,split.var = E_subject_s_gender)
e4.income <- split_summ(data=M,outcome=e.participate.e4,treat=treat,sims=500,split.var = income.class)
e4.lit <- split_summ(data=M,outcome=e.participate.e4,treat=treat,sims=500,split.var = lit.class)
e4 <- bind_rows(e4.pooled,e4.gender,e4.income,e4.lit)

dta.tab <- bind_cols(b11,b10,e4) %>% dplyr::select(status:prt,prt1,prt2)

names(dta.tab) <- c("Grouping","Treatment","Opportunities to Communicate","Opportunities to Participate","Participated in Planning")
print(xtable(dta.tab,
             caption="\\textbf{Treatment Effects on Participation Outcomes by Subgroupings}",
             label = "tab-participation"),
      hline.after=c(0,3,9,15,21), 
      include.rownames=FALSE,
      add.to.row = list(pos=list(6,12,18,21), command=c(rep("\\hdashline \n",3),
                                                        "\\hline \\multicolumn{5}{p{7.5in}}{\\footnotesize \\textbf{Outcome Variables: } \\emph{Opportunities to Communicate} is a survey response on a Likert scale of 1 (Very Dissatisfied) to 5 (Very Satisfied) on perceived opportunities to communicate with UWA about revenue sharing. \\emph{Opportunities to Participate} is a survey response on a Likert scale of 1 (Disagree) to 4 (Agree) on agreement that people like the respondent have opportunities to participate in revenue sharing. \\emph{Participated in Planning} is a binary survey response on whether the individual participated in planning meetings. \\textbf{Grouping Variables: } \\emph{Pooled} are all respondents by experimental condition. \\emph{Gender} is the self-reported gender of respondents. \\emph{Impoverished} indicates all individuals in households with an average monthly income of less than 100,000 shillings. \\emph{Literacy} is defined by self-reported full literacy. \\textbf{Standard Errors: } All standard errors are bootstrapped within experimental and grouping conditions. The standard errors on the treatment effect (diff) are the average differences between the bootstrapped values within each experimental condition over many draws.} \\\\")),
      comment=FALSE,
      floating = TRUE, floating.environment = "sidewaystable",
      caption.placement="top",
      booktabs=TRUE)

```

```{r tab-accountability, results="asis", warning=FALSE}

survey$survey.time <- as.POSIXct(survey$survey.time)
survey <- survey %>% filter(!is.na(treat))

#grouping classes
survey <- survey %>% mutate(income.class=case_when(approximate_monthly_income == "less_20" | approximate_monthly_income == "20-100" ~ "impoverished",
                                                   approximate_monthly_income == "100-200" | approximate_monthly_income == "200-500" | 
                                                   approximate_monthly_income == "500-1000" ~ "not impoverished"))

survey <- survey %>% mutate(lit.class=if_else(literacy == "read_well", "literate","not fully literate"))

#outcomes
# table(survey$project_implemented_as_planned_binary, useNA="always")
# table(survey$satisfied_with_implementation_scale,useNA = "always")
# table(survey$chose_again_num,useNA = "always")
survey <- survey %>% mutate(corruption=if_else(corruption_in_RS=="no",0,1))
# table(survey$corruption,useNA = "always")
# table(survey$pic_shown, useNA = "always")

#table objects
set.seed(101)
impl.pooled <- diff_summ(data=survey,outcome=project_implemented_as_planned_binary,treat=treat,sims=500)
impl.gender <- split_summ(data=survey,outcome=project_implemented_as_planned_binary,treat=treat,sims=500, split.var = gender)
impl.income <- split_summ(data=survey,outcome=project_implemented_as_planned_binary,treat=treat,sims=500, split.var = income.class)
impl.lit <- split_summ(data=survey,outcome=project_implemented_as_planned_binary,treat=treat,sims=500, split.var = lit.class)
impl <- bind_rows(impl.pooled,impl.gender,impl.income,impl.lit)

sat.pooled <- diff_summ(data=survey,outcome=satisfied_with_implementation_scale,treat=treat,sims=500)
sat.gender <- split_summ(data=survey,outcome=satisfied_with_implementation_scale,treat=treat,sims=500, split.var = gender)
sat.income <- split_summ(data=survey,outcome=satisfied_with_implementation_scale,treat=treat,sims=500, split.var = income.class)
sat.lit <- split_summ(data=survey,outcome=satisfied_with_implementation_scale,treat=treat,sims=500, split.var = lit.class)
sat <- bind_rows(sat.pooled,sat.gender,sat.income,sat.lit)

chose.pooled <- diff_summ(data=survey,outcome=chose_again_num,treat=treat,sims=500)
chose.gender <- split_summ(data=survey,outcome=chose_again_num,treat=treat,sims=500, split.var = gender)
chose.income <- split_summ(data=survey,outcome=chose_again_num,treat=treat,sims=500, split.var = income.class)
chose.lit <- split_summ(data=survey,outcome=chose_again_num,treat=treat,sims=500, split.var = lit.class)
chose <- bind_rows(chose.pooled,chose.gender,chose.income,chose.lit)

corr.pooled <- diff_summ(data=survey,outcome=corruption,treat=treat,sims=500)
corr.gender <- split_summ(data=survey,outcome=corruption,treat=treat,sims=500, split.var = gender)
corr.income <- split_summ(data=survey,outcome=corruption,treat=treat,sims=500, split.var = income.class)
corr.lit <- split_summ(data=survey,outcome=corruption,treat=treat,sims=500, split.var = lit.class)
corr <- bind_rows(corr.pooled,corr.gender,corr.income,corr.lit)

pic.pooled <- diff_summ(data=survey,outcome=pic_shown,treat=treat,sims=500)
pic.gender <- split_summ(data=survey,outcome=pic_shown,treat=treat,sims=500, split.var = gender)
pic.income <- split_summ(data=survey,outcome=pic_shown,treat=treat,sims=500, split.var = income.class)
pic.lit <- split_summ(data=survey,outcome=pic_shown,treat=treat,sims=500, split.var = lit.class)
pic <- bind_rows(pic.pooled,pic.gender,pic.income,pic.lit)

dta.tab2 <- bind_cols(impl,sat,chose,corr,pic) %>% dplyr::select(status:prt,prt1,prt2,prt3,prt4)

names(dta.tab2) <- c("Grouping","Treatment","Implemented","Satisfied Implementation","Chose Again","Corruption","Evidence Shown")

print(xtable(dta.tab2,
             caption="\\textbf{Treatment Effects on Accountability Outcomes by Subgroupings}",
             label = "tab-accountability"),
      hline.after=c(0,3,9,15,21), 
      include.rownames=FALSE,
      add.to.row = list(pos=list(6,12,18,21), command=c(rep("\\hdashline \n",3),
                                                        "\\hline \\multicolumn{7}{p{9.5in}}{\\footnotesize \\textbf{Outcome Variables: } \\emph{Implemented} is a survey response that indicates whether the respondent observed any evidence of the approve project being implemented, regardless of its quality or completeness. \\emph{Satisfied Implementation} is a survey response on a Likert scale of 0 (Very Dissatisfied) to 3 (Very Satisfied) on the level of satisfaction with the implementation of revenue sharing. \\emph{Chosen Again} is a binary survey response on whether the individual would choose the same project again given the experience with planning and implementation. \\emph{Corruption} is a binary survey response on whether the individual has seen evidence of corruption in their village's previous revenue sharing projects. \\emph{Evidence Shown} is a binary indicator of whether the respondent was willing and able to locate physical evidence of project implementation to be photographed by our research team. \\textbf{Grouping Variables: } \\emph{Pooled} are all respondents by experimental condition. \\emph{Gender} is the self-reported gender of respondents. \\emph{Impoverished} indicates all individuals in households with an average monthly income of less than 100,000 shillings. \\emph{Literacy} is defined by self-reported full literacy. \\textbf{Standard Errors: } All standard errors are bootstrapped within experimental and grouping conditions. The standard errors on the treatment effect (diff) are the average differences between the bootstrapped values within each experimental condition over many draws.} \\\\")),
      comment=FALSE,
      floating = TRUE, floating.environment = "sidewaystable",
      caption.placement="top",
      booktabs=TRUE, size="small")

```

```{r analysis-audit, cache=TRUE, message=FALSE, include=FALSE}
#This figure is based on analysis code in Bwindi_Accountability.R, section: "Randomization inference"

sub.c <- subset(audit.c.ri, shared==0)
sub.c$finished <- ifelse(sub.c$implementation_status=="finished", 1, 0)
sub.c$approved.imp <- ifelse(sub.c$project_part_approved=="approved" & sub.c$implementation_status!="not_start_exp" & sub.c$implementation_status!="not_start_not_", 1, 0)
sub.c$complete <- ifelse(sub.c$proportion_observable=="complete" | sub.c$proportion_observable=="mostly_comp", 1, 0)
sub.c$ver <- ifelse(sub.c$Verifiable=="verifiable", 1, 0)
sub.c$somewhat.ver <- ifelse(sub.c$Verifiable=="somewhat_ver", 1, 0)
sub.c$not.ver <- ifelse(sub.c$Verifiable=="not_ver", 1, 0)

sims <- 10000

##Difference-in-means
dm.finished <- ri.diff(ri.obj = sub.c, outcome.var = "finished", treat.var = "treat", treat.ri.var = "t.sub", info.cols = 95, sims=sims)
dm.approved <- ri.diff(ri.obj = sub.c, outcome.var = "approved.imp", treat.var = "treat", treat.ri.var = "t.sub", info.cols = 95, sims=sims)
dm.complete.con <- ri.diff(ri.obj = sub.c[sub.c$dispersed_status=="not_dispersed",], outcome.var = "complete", treat.var = "treat", treat.ri.var = "t.sub", info.cols = 95, sims=sims)
dm.pics <- ri.diff(ri.obj = sub.c[sub.c$dispersed_status=="dispersed",], outcome.var = "pics", treat.var = "treat", treat.ri.var = "t.sub", info.cols = 95, sims=sims) 
dm.ver <- ri.diff(ri.obj = sub.c, outcome.var = "ver", treat.var = "treat", treat.ri.var = "t.sub", info.cols = 95, sims=sims)
dm.somewhat.ver <- ri.diff(ri.obj = sub.c, outcome.var = "somewhat.ver", treat.var = "treat", treat.ri.var = "t.sub", info.cols = 95, sims=sims)

diff.null.se <- c(dm.finished$se.null, dm.approved$se.null, dm.complete.con$se.null, dm.pics$se.null, dm.ver$se.null, dm.somewhat.ver$se.null)
p <- c(dm.finished$p.one.way.greater, dm.approved$p.one.way.greater, dm.complete.con$p.one.way.greater, dm.pics$p.one.way.greater, dm.ver$p.one.way.greater, dm.somewhat.ver$p.one.way.greater)

##FELM, with village clustering
felm.finished <- felm.ri(finished ~ treat | sc.block.y | 0 | village_id, dta=sub.c, treat.var = "treat", rand.ob = sub.c, rand.ob.info.cols = 95, sims=sims)
felm.approved <- felm.ri(approved.imp ~ treat | sc.block.y | 0 | village_id, dta=sub.c, treat.var = "treat", rand.ob = sub.c, rand.ob.info.cols = 95, sims=sims)
# felm.complete.con <- felm.ri(complete ~ treat | sc.block.y | 0 | village_id, dta=sub.c[sub.c$dispersed_status=="not_dispersed",], treat.var = "treat", 
#                              rand.ob = sub.c[sub.c$dispersed_status=="not_dispersed",], rand.ob.info.cols = 95, sims=sims)
felm.pics <- felm.ri(pics ~ treat | sc.block.y | 0 | village_id, dta=sub.c[sub.c$dispersed_status=="dispersed",], treat.var = "treat", 
                     rand.ob = sub.c[sub.c$dispersed_status=="dispersed",], rand.ob.info.cols = 95, sims=sims)
felm.ver <- felm.ri(ver ~ treat | sc.block.y | 0 | village_id, dta=sub.c, treat.var = "treat", rand.ob = sub.c, rand.ob.info.cols = 95, sims=sims)
felm.somewhat.ver <- felm.ri(somewhat.ver ~ treat | sc.block.y | 0 | village_id, dta=sub.c, treat.var = "treat", rand.ob = sub.c, rand.ob.info.cols = 95, sims=sims)

felm.ate <- c(felm.finished$ate, felm.approved$ate, -999, felm.pics$ate, felm.ver$ate, felm.somewhat.ver$ate)
felm.null.se <- c(felm.finished$se.null, felm.approved$se.null, -999, felm.pics$se.null, felm.ver$se.null, felm.somewhat.ver$se.null)
felm.p <- c(felm.finished$p.one.way.greater, felm.approved$p.one.way.greater, -999, felm.pics$p.one.way.greater, felm.ver$p.one.way.greater, felm.somewhat.ver$p.one.way.greater)
felm.n <- c(felm.finished$N, felm.approved$N, -999, felm.pics$N, felm.ver$N, felm.somewhat.ver$N)

```

```{r fig-audit, fig.width=11, fig.height=7, fig.fullwidth=TRUE}
#This figure is based on some unused code in Bwindi_Accountability.R, section: "Plotting Basic Outcomes by treatment condition"

###Binary outcome for implementation finished / not ----
#Note: this is the implementation status only, not the completeness of the project
#Question: According to the LC1 or the member of the project committee, what is the implementation status of this project part?
#Enumerator Instructions: This question does not record the completeness of the project, it just records whether implementation is done or whether they still expect more activities.

sub.c$finished <- ifelse(sub.c$implementation_status=="finished", 1, 0)
f <- data.frame(prop.table(table(sub.c$treat,sub.c$finished),1))[3:4,c(1,3)]
names(f) <- c("treatment","prop.finished")
f$treatment <- ifelse(f$treatment==0,"control","treated")

#Computing standard errors by bootstrapping
sims <- 5000
ctl.means <- rep(NA, sims)
trt.means <- rep(NA, sims)
set.seed(201)
for (i in 1:sims){
  ctl.means[i] <- mean(sample(sub.c$finished[sub.c$treat==0], replace=T))
  trt.means[i] <- mean(sample(sub.c$finished[sub.c$treat==1], replace=T))
}
f$se <- c(sd(ctl.means),sd(trt.means))

#Plotting
se.bars <- aes(ymax = f$prop.finished + f$se, ymin = f$prop.finished - f$se)

f.plot <- ggplot(data=f, aes(x=treatment, y=prop.finished, fill=treatment)) + geom_bar(stat="identity") + geom_errorbar(se.bars, width=0.3) + 
           ylab("Proportion w/ Implementation Finished") + xlab("Treatment Status") + scale_fill_manual(values=c('#999999','#E69F00')) +
           theme(legend.position = "none", plot.title = element_text(face="bold")) + 
           annotate("text", Inf, Inf, 
                    label = paste("ATE = ",round(felm.ate[1],2)," (",round(felm.null.se[1],3),"), p = ",round(felm.p[1],2),sep = ""), hjust = 1, vjust = 1) +
           ggtitle("Implementation Finished") 
#f.plot #Note: could represent that treatment villages are able to keep projects "open" for longer


###Proportion of projects approved by UWA -----

sub.c$approved.imp <- ifelse(sub.c$project_part_approved=="approved" & sub.c$implementation_status!="not_start_exp" & sub.c$implementation_status!="not_start_not_", 1, 0)

a <- data.frame(prop.table(table(sub.c$treat,sub.c$approved.imp),1))[3:4,c(1,3)]
names(a) <- c("treatment","prop.approved")
a$treatment <- ifelse(a$treatment==0,"control","treated")

#Computing standard errors by bootstrapping
sims <- 5000
ctl.means <- rep(NA, sims)
trt.means <- rep(NA, sims)
set.seed(201)
for (i in 1:sims){
  ctl.means[i] <- mean(sample(sub.c$approved.imp[sub.c$treat==0], replace=T))
  trt.means[i] <- mean(sample(sub.c$approved.imp[sub.c$treat==1], replace=T))
}
a$se <- c(sd(ctl.means),sd(trt.means))

#Plotting
se.bars <- aes(ymax = a$prop.approved + a$se, ymin = a$prop.approved - a$se)

a.plot <- ggplot(data=a, aes(x=treatment, y=prop.approved, fill=treatment)) + geom_bar(stat="identity") + geom_errorbar(se.bars, width=0.3) + 
           ylab("Proportion w/ Approved Project") + xlab("Treatment Status") + scale_fill_manual(values=c('#999999','#E69F00')) +
           theme(legend.position = "none", plot.title = element_text(face="bold")) + 
           annotate("text", Inf, Inf, 
                    label = paste("ATE = ",round(felm.ate[2],2)," (",round(felm.null.se[2],3),"), p = ",round(felm.p[2],2),sep = ""), hjust = 1, vjust = 1) + 
           ggtitle("Approved Project")


###Completeness of not_dispersed projects ----

dm.complete.con <- ri.diff(ri.obj = sub.c[sub.c$dispersed_status=="not_dispersed",], outcome.var = "complete", treat.var = "treat", treat.ri.var = "t.sub", info.cols = 95, sims=sims)

sub.c.nd$complete <- ifelse(sub.c.nd$proportion_observable=="complete" | sub.c.nd$proportion_observable=="mostly_comp", 1, 0)
nd <- data.frame(prop.table(table(sub.c.nd$treat,sub.c.nd$complete),1))[3:4,c(1,3)]
names(nd) <- c("treatment","prop.complete")
nd$treatment <- ifelse(nd$treatment==0,"control","treated")

#Computing standard errors by bootstrapping
sims <- 5000
ctl.means <- rep(NA, sims)
trt.means <- rep(NA, sims)
set.seed(201)
for (i in 1:sims){
  ctl.means[i] <- mean(sample(sub.c.nd$complete[sub.c.nd$treat==0], replace=T))
  trt.means[i] <- mean(sample(sub.c.nd$complete[sub.c.nd$treat==1], replace=T))
}
nd$se <- c(sd(ctl.means),sd(trt.means))

#Plotting
se.bars <- aes(ymax = nd$prop.complete + nd$se, ymin = nd$prop.complete - nd$se)

nd.plot <- ggplot(data=nd, aes(x=treatment, y=prop.complete, fill=treatment)) + geom_bar(stat="identity") + geom_errorbar(se.bars, width=0.3) + 
  ylab("Proportion Mostly or Fully Complete") + xlab("Treatment Status") + scale_fill_manual(values=c('#999999','#E69F00')) +
           theme(legend.position = "none", plot.title = element_text(face="bold")) + 
           annotate("text", Inf, Inf, 
                    label = paste("ATE = ",round(dm.complete.con$ate,2)," (",round(dm.complete.con$se.null,3),"), p = ",round(dm.complete.con$p.one.way.greater,2),
                                  sep = ""), hjust = 1, vjust = 1) + 
           ggtitle("Not Dispersed Completed")
#Note: not enough observations to use the data from "not_dispersed" projects in isolation


###Number of Pictures of Dispersed Projects ----

pic.dta <- sub.c.d %>% group_by(treat) %>% summarise(pics=mean(pics)) %>%
           mutate(treatment=case_when(treat==0 ~"control",treat==1~"treated"))

#Computing standard errors by bootstrapping
sims <- 5000
ctl.means <- rep(NA, sims)
trt.means <- rep(NA, sims)

set.seed(201)
for (i in 1:sims){
  ctl.means[i] <- mean(sample(sub.c.d$pics[sub.c.d$treat==0], replace=T))
  trt.means[i] <- mean(sample(sub.c.d$pics[sub.c.d$treat==1], replace=T))
}
pic.dta$se <- c(sd(ctl.means),sd(trt.means))

se.bars <- aes(ymax = pic.dta$pics + pic.dta$se, ymin = pic.dta$pics - pic.dta$se)

pic <- ggplot(data=pic.dta, aes(x=treatment, y=pics, fill=treatment)) +
  geom_bar(stat="identity", position=position_dodge())  + geom_errorbar(se.bars, width=0.3, position=position_dodge(0.9)) + 
  scale_fill_manual(values=c('#999999','#E69F00')) + ylab("Objects Shown at Audit (out of 10)") + xlab("Treatment Status") +
  theme(legend.position = "none", plot.title = element_text(face="bold")) + 
           annotate("text", Inf, Inf, 
                    label = paste("ATE = ",round(felm.ate[4],2)," (",round(felm.null.se[4],3),"), p = ",round(felm.p[4],2),sep = ""), hjust = 1, vjust = 1) + 
           ggtitle("Dispersed Objects Shown")


###Verifiability of all project components ----
ver.dta <- data.frame(table(sub.c$treat,sub.c$Verifiable))
names(ver.dta) <- c("treatment","ver.status","count")
ver.dta$treatment <- ifelse(ver.dta$treatment==0,"control","treated")
ver.dta$ver.status <- ifelse(ver.dta$ver.status=="not_ver","Not Verifiable",
                             ifelse(ver.dta$ver.status=="somewhat_ver","Somewhat Verifiable","Verifiable"))

#Computing standard errors by bootstrapping
sims <- 5000
not.ctl.means <- rep(NA, sims)
not.trt.means <- rep(NA, sims)
some.ctl.means <- rep(NA, sims)
some.trt.means <- rep(NA, sims)
ver.ctl.means <- rep(NA, sims)
ver.trt.means <- rep(NA, sims)

set.seed(201)
for (i in 1:sims){
  ctl.samp <- sample(sub.c$Verifiable[sub.c$treat==0], replace=T)
  trt.samp <- sample(sub.c$Verifiable[sub.c$treat==1], replace=T)
  
  not.ctl.means[i] <- sum(ctl.samp=="not_ver")
  not.trt.means[i] <- sum(trt.samp=="not_ver")
  some.ctl.means[i] <- sum(ctl.samp=="somewhat_ver")
  some.trt.means[i] <- sum(trt.samp=="somewhat_ver")
  ver.ctl.means[i] <- sum(ctl.samp=="verifiable")
  ver.trt.means[i] <- sum(trt.samp=="verifiable")
}
ver.dta$se <- c(sd(not.ctl.means),sd(not.trt.means),sd(some.ctl.means),sd(some.trt.means),sd(ver.ctl.means),sd(ver.trt.means))

se.bars <- aes(ymax = ver.dta$count + ver.dta$se, ymin = ver.dta$count - ver.dta$se)

ver <- ggplot(data=ver.dta, aes(x=ver.status, y=count, fill=treatment)) +
       geom_bar(stat="identity", position=position_dodge())  + geom_errorbar(se.bars, width=0.3, position=position_dodge(0.9)) + 
       scale_fill_manual(values=c('#999999','#E69F00')) + ylab("Count of Project Components") + xlab("Component Verifiable from Revenue Sharing") +
       ggtitle("Verifiability of Project Components") + theme(plot.title = element_text(face="bold"), legend.position = c(1,1), legend.justification=c(1,1))


#Plotting in a grid layout
lay <- rbind(c(1,2,3),
             c(4,5,5))

grid.arrange(f.plot,a.plot,nd.plot,pic,ver, layout_matrix = lay)

```

```{r tab-participation-spill, warning=FALSE, results="asis", message=FALSE}

#Participation Phase models, w/ village clustering
#e.participate.b11 #Opportunities to communicate with UWA
#e.participate.b10 #Opportunities to participate in planning
#e.participate.e4 #participated in planning meeting

mod.h9.1 <- felm(e.participate.b11 ~ treat + b.participate.b11 + E_subject_s_gender + E_Subject_s_age + E_Subject_s_income + E_Can_you_read_001 + subjects.per.village | subcounty.blocking | 0 | Updated.Village, data=M)

mod.h9.1.spl <- felm(e.participate.b11 ~ treat*S1 + b.participate.b11 + E_subject_s_gender + E_Subject_s_age + E_Subject_s_income + E_Can_you_read_001 + subjects.per.village | subcounty.blocking | 0 | Updated.Village, data=M)

mod.h8.1 <- felm(e.participate.b10 ~ treat + b.participate.b10 + E_subject_s_gender + E_Subject_s_age + E_Subject_s_income + E_Can_you_read_001 + subjects.per.village | subcounty.blocking | 0 | Updated.Village, data=M)

mod.h8.1.spl <- felm(e.participate.b10 ~ treat*S1 + b.participate.b10 + E_subject_s_gender + E_Subject_s_age + E_Subject_s_income + E_Can_you_read_001 + subjects.per.village | subcounty.blocking | 0 | Updated.Village, data=M)

mod.h8.3 <- felm(e.participate.e4 ~ treat + E_subject_s_gender + E_Subject_s_age + E_Subject_s_income + E_Can_you_read_001 + subjects.per.village | subcounty.blocking | 0 | Updated.Village, data=M)

mod.h8.3.spl <- felm(e.participate.e4 ~ treat*S1 + E_subject_s_gender + E_Subject_s_age + E_Subject_s_income + E_Can_you_read_001 + subjects.per.village | subcounty.blocking | 0 | Updated.Village, data=M)

stargazer(mod.h9.1,mod.h9.1.spl,mod.h8.1,mod.h8.1.spl,mod.h8.3,mod.h8.3.spl,
          keep=c("treat","S11","S12","treat:S11","treat:S12"),
          dep.var.labels = c("Opportunities to Communicate","Opportunities to Participate","Participated in Planning"),
          df = FALSE, omit.stat = c("rsq","ser"), ci=TRUE,
          star.cutoffs = NA,
          label = "tab-participation-spill",
          add.lines = list(c("Village fixed effects", "Yes", "Yes","Yes","Yes","Yes", "Yes"),
                           c("Covariates", "Yes", "Yes","Yes","Yes","Yes", "Yes")),
          covariate.labels = c("Treated","One Contiguous","Two Contiguous","Treated X One Contiguous","Treated X Two Contiguous"),
          title = "Treatment Effects for Participation Outcomes Considering Spillover",
          notes.append = FALSE, notes.label = "",
          notes = "\\parbox[t]{\\textwidth}{Notes: Confidence intervals are based on standard errors clustered at the village level.}",
          notes.align = "l",
          header = FALSE,
          float.env = "sidewaystable")


```

```{r tab-accountability-spill, warning=FALSE, results="asis", message=FALSE}

#source2("~/Google Drive/Bwindi project/Accountability phase/Code/Bwindi_Accountability.R", start=708, end=778)
#Note: this sources the setup for the spillover analysis, as of 4/11/19
#Note: the table from the JPAM submission is reformatted to match the previous chunk

#This is the list of villages with confirmed ordering from Jeremiah. See Slack, bwindi_papers, 2/22/19 for more information 
vlist.ordered.dta <- read.csv("BwindiVillageOrder_190201_JN.csv", stringsAsFactors=FALSE)
v_ids.ordered <- vlist.ordered.dta$accountability.id[!is.na(vlist.ordered.dta$accountability.id)]

#sort(v_ids.ordered) #all randomized villages accounted for 

con.mat <- matrix(0, nrow=93,ncol=93)
row.names(con.mat) <- v_ids.ordered
colnames(con.mat) <- v_ids.ordered
delta <- row(con.mat) - col(con.mat)

con.mat[abs(delta)==1 | abs(delta)==92] <- 1 #changing all directly contiguous to value == 1

#These are objects for further robustness check if necessary, per PAP
con2.mat <- con.mat
con2.mat[abs(delta)>0 & (abs(delta)<=2 | abs(delta)>=91)] <- 1 #changing all contiguous w/in two villages to value == 1

con3.mat <- con.mat
con3.mat[abs(delta)>0 & (abs(delta)<=3 | abs(delta)>=90)] <- 1 #changing all contiguous w/in two villages to value == 1

treated.villages <- p2.assigned$village.id[p2.assigned$treat2==1]

#Modified from WD replication file, Participation Phase
for (i in 1:nrow(audit.c.ri)){
  row.test <- con.mat[audit.c.ri$village_id[i]==row.names(con.mat),]==1
  row.test2 <- con2.mat[audit.c.ri$village_id[i]==row.names(con2.mat),]==1
  row.test3 <- con3.mat[audit.c.ri$village_id[i]==row.names(con3.mat),]==1
  
  con.villages <- row.names(con.mat)[row.test]
  con2.villages <- row.names(con2.mat)[row.test2]
  con3.villages <- row.names(con3.mat)[row.test3]
  
  audit.c.ri$S1[i] <- sum(con.villages %in% treated.villages)
  audit.c.ri$S2[i] <- sum(con2.villages %in% treated.villages)
  audit.c.ri$S3[i] <- sum(con3.villages %in% treated.villages)
}

audit.c.ri$S1 <- as.factor(audit.c.ri$S1)
audit.c.ri$S2 <- as.factor(audit.c.ri$S2)
audit.c.ri$S3 <- as.factor(audit.c.ri$S3)

#Remaking sub.c object from above
sub.c <- subset(audit.c.ri, shared==0)
sub.c$finished <- ifelse(sub.c$implementation_status=="finished", 1, 0)
sub.c$approved.imp <- ifelse(sub.c$project_part_approved=="approved" & sub.c$implementation_status!="not_start_exp" & sub.c$implementation_status!="not_start_not_", 1, 0)
sub.c$complete <- ifelse(sub.c$proportion_observable=="complete" | sub.c$proportion_observable=="mostly_comp", 1, 0)
sub.c$ver <- ifelse(sub.c$Verifiable=="verifiable", 1, 0)
sub.c$somewhat.ver <- ifelse(sub.c$Verifiable=="somewhat_ver", 1, 0)
sub.c$not.ver <- ifelse(sub.c$Verifiable=="not_ver", 1, 0)

#Models using lm_robust(), S1
fin <- lm_robust(finished ~ treat*S1, data=sub.c, clusters = sub.c$village.id, fixed_effects = sub.c$sc.block.y)
app <- lm_robust(approved.imp ~ treat*S1, data=sub.c, clusters = sub.c$village.id, fixed_effects = sub.c$sc.block.y)
comp <- lm_robust(complete ~ treat*S1, data=sub.c[sub.c$dispersed_status=="not_dispersed",], 
                  clusters = sub.c$village.id) #block fixed-effect not possible with spillover indicator, dropping
pics <- lm_robust(pics ~ treat*S1, data=sub.c[sub.c$dispersed_status=="dispersed",], 
                  clusters = sub.c$village.id, fixed_effects = sub.c[sub.c$dispersed_status=="dispersed",]$sc.block.y)
ver <- lm_robust(ver ~ treat*S1, data=sub.c, clusters = sub.c$village.id, fixed_effects = sub.c$sc.block.y)
somewhat_ver <- lm_robust(somewhat.ver ~ treat*S1, data=sub.c, clusters = sub.c$village.id, fixed_effects = sub.c$sc.block.y)
#result: none of the models show moderation of treatment effect by spillover

#Tricking stargazer() to produce table with estimatr() output.
#Instructions at: https://declaredesign.org/r/estimatr/articles/regression-tables.html
fin <- lm(finished ~ treat*S1 + sc.block.y, data=sub.c)
app <- lm(approved.imp ~ treat*S1 + sc.block.y, data=sub.c)
comp <- lm(complete ~ treat*S1, data=sub.c[sub.c$dispersed_status=="not_dispersed",])
pics <- lm(pics ~ treat*S1 + sc.block.y, data=sub.c[sub.c$dispersed_status=="dispersed",])
ver <- lm(ver ~ treat*S1 + sc.block.y, data=sub.c)
somewhat_ver <- lm(somewhat.ver ~ treat*S1 + sc.block.y, data=sub.c)


stargazer(fin,app,comp,pics,ver,somewhat_ver, type="latex",
          keep = c("treat1","S11","S12","treat1:S11","treat1:S12"),
          dep.var.caption  = NULL,
          dep.var.labels = c("\\shortstack{Implementation\\\\Finished}","\\shortstack{Approved\\\\Project}","\\shortstack{Not Dispersed\\\\ Completed}","\\shortstack{Dispersed\\\\Objects Shown}",
                             "Verifiable","\\shortstack{Somewhat\\\\Verifiable}"),
          covariate.labels = c("Treated","One Contiguous","Two Contiguous","Treated X One Contiguous","Treated X Two Contiguous"),
          se = starprep(fin,app,comp,pics,ver,somewhat_ver, clusters = sub.c$village.id),
          ci.custom = starprep(fin,app,comp,pics,ver,somewhat_ver, stat = "ci"),
          report = c('vcs'), df = FALSE, omit.stat = c("rsq","ser","F"),
          notes.append = FALSE, notes.label = "", 
          notes = "\\parbox[t]{\\textwidth}{Notes: Confidence intervals are based on standard errors clustered at the village level.}",
          notes.align = "l",
          star.cutoffs = NA,
          label = "tab-accountability-spill",
          add.lines = list(c("Block fixed effects", "Yes", "Yes","Yes","Yes","Yes", "Yes"),
                           c("Covariates", "No", "No","No","No","No","No")),
          title = "Treatment Effects for Village-Level Accountability Outcomes Considering Spillover",
          header = FALSE,
          float.env = "sidewaystable"
) 

```